#! /usr/bin/env python3
# -*- coding: utf-8 -*-
#
# flo-list-movies --- List movies found in local or remote directories
#
# Copyright (c) 2008, 2013, 2014 Florent Rougon
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; see the file COPYING. If not, write to the
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA  02110-1301 USA.

import sys, locale, os, stat, getopt, re, subprocess, tempfile, textwrap, \
    shlex, traceback
try:
    import xml.etree.ElementTree as et
except ImportError:
    import elementtree.ElementTree as et
import xml.parsers.expat                # for xml.parsers.expat.error

import flo_small_funcs


default_movie_regexps = (
        re.compile(r".*\.(mkv|webm|avi|og[vm]|flv|mov|ts|mp4|wmv|mpe?g|m2t)$",
                   re.IGNORECASE),)

progname = os.path.basename(sys.argv[0])
progversion = "0.6"
version_blurb = """Written by Florent Rougon.

Copyright (c) 2008, 2014  Florent Rougon
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."""

usage = """Usage: {progname} [option ...]
List movies found in local or remote directories.

Options:
      --config-file=FILE       use FILE instead of the default configuration
                               files, /etc/{progname}/config.py and
                               ~/.{progname}/config.py (read in this order
                               unless --config-file is supplied)
  -d, --print-dirname          print the directory where each file was found
  -f, --print-fullpath         print the full path of each file
      --dont-print-location    don't print the location name (in brackets)
  -s, --shell-quote            quote the paths printed by options
                               --print-dirname and --print-fullpath according
                               to POSIX shell conventions
                               after every file name
  -n, --no-comments            don't print the comments about movies found
                               in XML files
      --help                   display this message and exit
      --version                output version information and exit""".format(
    progname=progname)

params = None

# Exceptions raised by this module
class error(Exception):
    """Base class for exceptions in flo-list-movies."""
    def __init__(self, message=None):
        self.message = message

    def __str__(self):
        return self.complete_message()

    def __repr__(self):
        return "{0}.{1}({2!r})".format(__name__, self.__class__.__name__,
                                       self.message)

    def complete_message(self):
        if self.message:
            return "{0}: {1}".format(self.ExceptionShortDescription,
                                     self.message)
        else:
            return self.ExceptionShortDescription

    ExceptionShortDescription = "{0} generic exception".format(progname)

class NoSuchConfigurationFile(error):
    """Exception raised when the user specified a configuration file that doesn't exist."""
    ExceptionShortDescription = "No such configuration file"

class UserError(error):
    """Exception raised when the program is used incorrectly."""
    ExceptionShortDescription = "User error"

class SeveralConfigFileOptionsSupplied(UserError):
    """Exception raised when several --config-file options were supplied."""
    ExceptionShortDescription = "Several --config-file options were supplied"


class ErrorList(dict):
    def __init__(self, *args, **kwargs):
        dict.__init__(self, *args, **kwargs)

    def append(self, location_name, item, error):
        if location_name not in self:
            self[location_name] = {}

        if item not in self[location_name]:
            self[location_name][item] = []

        self[location_name][item].append(error)

    def extend(self, location_name, item, errors):
        if len(errors) == 0:
            return

        if location_name not in self:
            self[location_name] = {}

        if item not in self[location_name]:
            self[location_name][item] = []

        self[location_name][item].extend(errors)

    def cleanup(self):
        for location_name, d in self.items():
            for item, errors in d.items():
                if len(errors) == 0:
                    del d[item]

            if len(d) == 0:
                del self[location_name]

    def count(self):
        res = 0

        for location_name, d in self.items():
            for item, errors in d.items():
                res += len(errors)

        return res


class MovieMetadata(dict):
    def __init__(self, *args, **kwargs):
        dict.__init__(self, *args, **kwargs)

class MovieFile(dict):
    def __init__(self, *args, **kwargs):
        dict.__init__(self, *args, **kwargs)


def prefix_lines(s, prefix):
    return textwrap.indent(s, prefix, predicate=lambda x: True)


def err(msg):
    return prefix_lines(msg, "%s: " % progname)


def os_walk_dumb_error_handling(exception):
    raise exception


def execute_remote_command(remote_user, remote_host, args, *,
                           binary_output=False):
    command = [ "ssh", "-l", remote_user, remote_host ]
    # ssh (version 4.3p2) is stupid: if an argument contains spaces, it
    # splits it into several arguments. Therefore, we have to quote every
    # argument in the shell manner!
    command.extend(map(shlex.quote, args))

    output = ""
    errors = []
    if binary_output:
        mode = "w+b"
        buffering = -1
    else:
        mode = "w+t"
        buffering = 1

    with tempfile.TemporaryFile(mode=mode, buffering=buffering) as tmpfile:
        try:
            p = subprocess.Popen(command, shell=False, universal_newlines=True,
                                 stdout=tmpfile, stderr=subprocess.PIPE,
                                 close_fds=True)
            for line in p.stderr:
                if line.endswith("\n"):
                    line = line[:-1]
                errors.append(
                    "[stderr of ssh child process] %s" % (line,))

            retcode = p.wait()
        except os.error as e:
            errors.append(
                "error while trying to execute the following ssh command:\n"
                "  command: %s\n"
                "  error message: %s" % (command, e.strerror))
            return (False, None, output, errors)

        if retcode == 255:
            errors.append(
                "ssh error while trying to execute a command on remote host %s:\n"
                "  command was: %s" % (remote_host, command))
            return (False, retcode, output, errors)
        elif retcode < 0:
            errors.append(
                "ssh command on remote host %s was terminated by signal %d:\n"
                "  command was: %s" % (remote_host, -retcode, command))
            return (False, retcode, output, errors)
#         elif retcode != 0:
#             sys.exit(err(
#                 "remote command on host %s returned a non-zero"
#                 "exit status (%d):\n"
#                 "  command was: %s" % (remote_host, retcode, command)))

        tmpfile.flush()
        tmpfile.seek(0, 0)
        output = tmpfile.read()

    return (True, retcode, output, errors)


def scp(args):
    command = [ "scp", "-q" ]
    command.extend(args)

    output = ""
    errors = []

    with tempfile.TemporaryFile(mode="w+t", buffering=1) as tmpfile:
        try:
            p = subprocess.Popen(command, shell=False, universal_newlines=True,
                                 stdout=tmpfile, stderr=subprocess.PIPE,
                                 close_fds=True)
            for line in p.stderr:
                if line.endswith("\n"):
                    line = line[:-1]
                errors.append(
                    "[stderr of scp child process] %s" % (line,))

            retcode = p.wait()
        except os.error as e:
            errors.append(
                "error while trying to execute the following scp command:\n"
                "  command: %s\n"
                "  error message: %s" % (command, e.strerror))
            return (False, None, output, errors)

        if retcode > 0:
            errors.append(
                "scp error:\n"
                "  command was: %s" % (command,))
            return (False, retcode, output, errors)
        elif retcode < 0:
            errors.append(
                "scp command on remote host %s was terminated by signal %d:\n"
                "  command was: %s" % (remote_host, -retcode, command))
            return (False, retcode, output, errors)

        tmpfile.flush()
        tmpfile.seek(0, 0)
        output = tmpfile.read()

    return (True, retcode, output, errors)


def scp_files_from_same_account(remote_user, remote_host, paths, dest):
    args = [ "%s@%s:%s" % (remote_user, remote_host, path) for path in paths ]
    args.append(dest)

    exec_ok, retcode, output, errors = scp(args)

    return (exec_ok, errors)


def remote_directory_exists(remote_user, remote_host, directory):
    # This tests if 'directory' is a directory or *resolves* to a directory
    # (follows symbolic links according to SUSv3)
    remote_command = [ "test", "-d", directory ]

    exec_ok, retcode, output, errors = \
             execute_remote_command(remote_user, remote_host, remote_command)

    if not exec_ok:
        return (False, errors)

    if retcode == 0:
        return (True, errors)
    elif retcode == 1:
        errors.append("is not, and does not resolve to a directory")
        return (False, errors)
    else:
        errors.append("remote test command on host %s returned a >1"
                      "exit status (%d):\n"
                      "  command was: %s" % (remote_host, retcode,
                                             remote_command))
        return (False, errors)


def list_all_files_under_remote_dir(remote_user, remote_host, location_name,
                                    base_dir, prune_res, items_with_errors):
    remote_command = [ "find", base_dir, "-regextype", "posix-extended" ]
    for prune_re in prune_res:
        remote_command.extend(
            [ "-type", "d", "-regex", prune_re, "-prune", "-o" ])
    remote_command.extend([ "-type", "f", "-print0" ])

    exec_ok, retcode, output, errors = \
             execute_remote_command(remote_user, remote_host, remote_command,
                                    binary_output=True)

    items_with_errors.extend(location_name, base_dir, errors)

    if not exec_ok:
        return []

    if retcode != 0:
        errors.append("find command on host %s returned a non-zero"
                      "exit status (%d):\n"
                      "  command was: %s" % (remote_host, retcode,
                                             remote_command))
        items_with_errors.extend(location_name, base_dir, errors)
        return []

    files = [ name.decode(params["preferred encoding"])
              for name in output.split(b'\x00') ]

    return files


def find_files_under_remote_dir(remote_user, remote_host, location_name,
                                base_dir, prune_res, cregexps,
                                items_with_errors):
    is_dir_ok, errors = remote_directory_exists(remote_user, remote_host,
                                                base_dir)
    items_with_errors.extend(location_name, base_dir, errors)

    if not is_dir_ok:
        return

    for full_path in list_all_files_under_remote_dir(
        remote_user, remote_host, location_name, base_dir, prune_res,
        items_with_errors):
        for r in cregexps:
            dirname, basename = os.path.split(full_path)
            mo = r.match(basename)
            if mo:
                yield (dirname, basename)
                break


def find_files_under_local_dir(location_name, base_dir, prune_res, cregexps,
                               items_with_errors):
    """List files under BASE_DIR with basename matching certain regular expressions.

    This function is a generator that yields every file under BASE_DIR
    for which the basename matches at least one of the compiled regular
    expressions in iterable CREGEXPS.

    """
    try:
        mode = os.stat(base_dir)[stat.ST_MODE]
    except os.error as e:
        items_with_errors.append(location_name, base_dir,
                                 "%s: %s" % (e.strerror, e.filename))
        return

    # This tests if 'base_dir' is a directory or *resolves* to a directory
    if stat.S_ISDIR(mode):
        prune_cres = [ re.compile(prune_re) for prune_re in prune_res ]
        for dirpath, dirnames, filenames in os.walk(
            base_dir, onerror=os_walk_dumb_error_handling):
            for i, dirname in enumerate(dirnames):
                if any(( prune_cre.match(os.path.join(dirpath, dirname))
                         for prune_cre in prune_cres )):
                    del dirnames[i]

            for f in filenames:
                for r in cregexps:
                    mo = r.match(f)
                    if mo:
                        yield (dirpath, f)
                        break
    else:
        items_with_errors.append(location_name, base_dir,
                                "is not, and does not resolve to a directory")


def find_files_in_dir_location(location_name, location, items_with_errors):
    if "regexps" in location:
        regexps = location["regexps"]
    else:
        regexps = params["default_regexps"]

    for elt in location["directories"]:
        if isinstance(elt, str):
            d = elt
            prune_res = []
        else:
            d = elt["base dir"]
            prune_res = elt["prune regexps"]

        if location["remote"]:
            for dirname, basename in find_files_under_remote_dir(
                location["remote user"],
                location["remote host"],
                location_name, d, prune_res, regexps, items_with_errors):
                yield (d, dirname, basename)
        else:
            for dirname, basename in find_files_under_local_dir(
                location_name, d, prune_res, regexps, items_with_errors):
                yield (d, dirname, basename)


def find_movies_in_XMLfile(loc_name, filepath, items_with_errors,
                           orig_path=None):
    try:
        tree = et.ElementTree(file=filepath)

        count = 0
        for disc_elt in tree.getroot():
            count += 1
            s = disc_elt.get("number")
            try:
                disc_number = int(s)
            except ValueError:
                items_with_errors.append(
                    loc_name, orig_path or filepath,
                    '"number" attribute of disc element number {} is not '
                    'an integer: {!r}'.format(count, s))

            for video_elt in disc_elt:
                title = video_elt.findtext("title").strip()

                audio_version = video_elt.get("version", "")
                audio_lang = video_elt.get("audio", "")
                subtitles_lang = video_elt.get("subtitles", "")

                comments = video_elt.findtext("comments", None)
                # If there were comments, pack them with the title
                if comments is not None:
                    comments = comments.strip()
                else:
                    comments = ""

                mm = MovieMetadata(name=title,
                                   disc_number=disc_number,
                                   audio_version=audio_version,
                                   audio_lang=audio_lang,
                                   subtitles_lang=subtitles_lang,
                                   comments=comments)

                mm["location name"] = loc_name
                mm["XML file"] = orig_path
                yield mm
    except xml.parsers.expat.error as e:
        items_with_errors.append(loc_name, orig_path or filepath,
                                 "parse error at line %d, column %d"
                                 % (e.lineno, e.offset))
        return


def find_movies_in_XMLfiles_location(loc_name, location, items_with_errors):
    if location["remote"]:
        for remote_path in location["files"]:
            orig_path = "%s@%s:%s" % (location["remote user"],
                                      location["remote host"],
                                      remote_path)
            with tempfile.NamedTemporaryFile(prefix=progname, suffix=".xml",
                                             delete=True) as tmpfile:
                # It is impossible to copy all XML files in one scp command,
                # because two identical basenames would cause one of the files
                # to be overwritten.
                exec_ok, errors = scp_files_from_same_account(
                    location["remote user"],
                    location["remote host"],
                    (remote_path,),
                    tmpfile.name)

                items_with_errors.extend(loc_name, orig_path, errors)

                if not exec_ok:
                    continue

                for m in find_movies_in_XMLfile(
                    loc_name, tmpfile, items_with_errors,
                    orig_path=orig_path):
                    yield m
    else:
        for f in location["files"]:
            # One could shorten this with 'yield from', but it requires
            # Python 3.3 or later.
            for m in find_movies_in_XMLfile(loc_name, f, items_with_errors):
                yield m


def find_movies():
    results = []
    items_with_errors = ErrorList()

    for loc_name, location in params["locations"].items():
        if not location["remote"] in (True, False):
            sys.exit(err("illegal value for \"remote\" attribute of "
                         "location '%s': %s"
                         % (loc_name, repr(location["remote"]))))

        if location["type"] == "directories":
            for base_dir, dirname, basename in find_files_in_dir_location(
                loc_name, location, items_with_errors):
                # os.path.join(<dirname>, <basename>) gives the full path to
                # each file.
                mf = MovieFile()
                mf.update({ "location name": loc_name,
                            "base directory": base_dir,
                            "dirname": dirname,
                            "name": basename })
                results.append(mf)
        elif location["type"] == "XML files":
            for movie_metadata in find_movies_in_XMLfiles_location(
                loc_name, location, items_with_errors):
                results.append(movie_metadata)
        else:
            sys.exit(err("illegal location type for location '%s': '%s'"
                         % (loc_name, location["type"])))

    return (results, items_with_errors)


def format_output(movies, items_with_errors):
    if items_with_errors.count() > 0:
        items_with_errors.cleanup()
        print("# Errors were encountered with the following items:\n#")
        for location_name, d in items_with_errors.items():
            for item, errors in d.items():
                print(prefix_lines("  [%s] %s" % (location_name, item),
                                   "#"))
                for error in errors:
                    print(prefix_lines(error, "#    "))
            print("#")

    for m in movies:
        items = [ m["name"] ]

        if params["print_location"]:
            items.append("[%s]" % m["location name"])

        if isinstance(m, MovieFile):
            if params["print_dirname"]:
                s = m["dirname"]

                if params["shell_quote"]:
                    s = shlex.quote(s)

                items.append(s)

            if params["print_fullpath"]:
                s = os.path.join(m["dirname"], m["name"])

                if params["shell_quote"]:
                    s = shlex.quote(s)

                items.append(s)
        elif isinstance(m, MovieMetadata):
            l = ["(disc %d" % m["disc_number"]]

            if (m["comments"] != "") and params["print_comments"]:
                l.append(" - %s" % m["comments"])

            l.append(")")

            items.append(''.join(l))
        else:
            assert False, "unexpected class: {!r}".format(m)

        print(' '.join(items))


def sanitize_locations(locs):
    """Sanitize a mapping describing locations.

    This involves tilde expansion in local paths and compilation of
    (regexp, regexp_options) tuples.

    Return a new dictionary containing the processed elements or
    references to unmodified elements from LOCS.

    """
    new_locs = {}

    for name, loc in locs.items():
        new_locs[name] = {}
        for loc_param, loc_value in loc.items():
            if (not loc["remote"]) and loc_param in ("directories", "files"):
                l = []
                for elt in loc_value:
                    if isinstance(elt, str):
                        l.append(os.path.expanduser(elt))
                    elif loc_param == "directories":
                        # elt must be a dict specifying a base dir and a list
                        # of regexps for directories to prune, let's do the
                        # tilde expansion on the base dir.
                        assert isinstance(elt, dict) and "base dir" in elt, elt
                        assert isinstance(elt["base dir"], str), elt["base dir"]
                        elt["base dir"] = os.path.expanduser(elt["base dir"])
                        l.append(elt)
                new_locs[name][loc_param] = l
            elif loc_param == "regexps":
                new_locs[name][loc_param] = []
                regexp_type = type(re.compile(""))
                for e in loc_value:
                    if isinstance(e, regexp_type):
                        new_locs[name][loc_param].append(e)
                    else:
                        new_locs[name][loc_param].append(
                            re.compile(*e))
            else:
                new_locs[name][loc_param] = loc_value

    return new_locs


def process_command_line_and_config_file():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "dfsn",
                                   ["config-file=",
                                    "dont-print-location",
                                    "print-dirname",
                                    "print-fullpath",
                                    "shell-quote",
                                    "no-comments",
                                    "help",
                                    "version"])
    except getopt.GetoptError:
        print(usage, file=sys.stderr)
        return ("exit", 1)

    # Let's start with the options that don't require any non-option argument
    # to be present
    for option, value in opts:
        if option == "--help":
            print(usage)
            return ("exit", 0)
        elif option == "--version":
            print("%s %s\n%s" % (progname, progversion, version_blurb))
            return ("exit", 0)

    # Now, require a correct invocation.
    if len(args) != 0:
        print(usage, file=sys.stderr)
        return ("exit", 1)

    params = {}

    # Get the home directory, if any, and store it in params (often useful).
    try:
        home_dir = os.environ["HOME"]
    except KeyError:
        home_dir = None
    params["home_dir"] = home_dir

    # Default values for options
    params["print_location"] = True
    params["print_dirname"] = False
    params["print_fullpath"] = False
    params["shell_quote"] = False
    params["default_regexps"] = default_movie_regexps
    params["print_comments"] = True
    params["LC_COLLATE"] = None

    # Parameters recognized in the configuration file(s)
    recognized_params = ("locations",
                         "print_location",
                         "print_dirname",
                         "print_fullpath",
                         "shell_quote",
                         "default_regexps",
                         "print_comments",
                         "LC_COLLATE")

    # Check if --config-file was used
    cfg_file_specified = None
    for option, value in opts:
        if option == "--config-file":
            if cfg_file_specified is not None:
                raise SeveralConfigFileOptionsSupplied()
            else:
                cfg_file_specified = value
                if not os.path.exists(cfg_file_specified):
                    raise NoSuchConfigurationFile(cfg_file_specified)

    if cfg_file_specified is not None:
        cfg_files = [ cfg_file_specified ]
    else:
        cfg_files = [ "/etc/%s/config.py" % progname ]
        if home_dir is not None:
            cfg_files.append(
                os.path.join(home_dir, ".%s" % progname, "config.py"))

    # Update 'params' with those set in the config files, if any
    flo_small_funcs.import_params_from_python_cfg_files(
        namespace=params, cfg_files=cfg_files,
        recognized_params=recognized_params)

    # Perform tilde expansion on parameters that represent files or
    # directories and were set in the configuration file (of course, for
    # arguments given on the command line, we let the shell perform the tilde
    # expansion).
#     for key in ("output_dir", "device"):
#         if params[key] is not None:
#             params[key] = os.path.expanduser(params[key])

    if "locations" not in params:
        sys.exit("No 'locations' parameter found.")

    params["locations"] = sanitize_locations(params["locations"])

    # General option processing
    for option, value in opts:
        if option in ("--dont-print-location",):
            params["print_location"] = False
        elif option in ("-d", "--print-dirname"):
            params["print_dirname"] = True
        elif option in ("-f", "--print-fullpath"):
            params["print_fullpath"] = True
        elif option in ("-s", "--shell-quote"):
            params["shell_quote"] = True
        elif option in ("-n", "--no-comments"):
            params["print_comments"] = False
        elif option == "--config-file":
            # Special option, was handled earlier
            pass
        else:
            # The options (such as --help) that cause immediate exit
            # were already checked, and caused the function to return.
            # Therefore, if we are here, it can't be due to any of these
            # options.
            assert False, "Unexpected option received from the " \
                "getopt module: {!r}".format(option)

    return ("continue", params)


def main():
    global params

    try:
        action, params = process_command_line_and_config_file()
        if action == "exit":
            sys.exit(params)

        locale.setlocale(locale.LC_ALL, '')

        if params["LC_COLLATE"] is not None:
            locale.setlocale(locale.LC_COLLATE, params["LC_COLLATE"])

        params["preferred encoding"] = locale.getpreferredencoding()

        # Sort the list by file basename, in a case-insensitive way, with
        # properly written numeric prefixes removed.
        sort_cre = re.compile(r"^(\s* (\d+ (\s* [-_—] \s* | \. \s+) )? )",
                              re.VERBOSE)
        def key_func(elt):
            without_numprefix = sort_cre.sub("", elt["name"], count=1)
            return locale.strxfrm(without_numprefix.lower())

        results, items_with_errors = find_movies()
        results.sort(key=key_func)
        format_output(results, items_with_errors)

        sys.exit(0)

    except Exception as exc_instance:
        print(traceback.format_exc(), file=sys.stderr)
        print("Error (see above for a traceback):\n\n{}".format(exc_instance),
              file=sys.stderr)
        sys.exit(2)

if __name__ == "__main__": main()
