mne-python / Git / [7f9fb8] /mne/utils/misc.py

Models:
ReneeD/
mne-python
Downloads: 1
[7f9fb8]: / mne / utils / misc.py
History
Download this file
510 lines (430 with data), 16.2 kB

"""Some miscellaneous utility functions."""

# Authors: The MNE-Python contributors.
# License: BSD-3-Clause
# Copyright the MNE-Python contributors.

import fnmatch
import gc
import hashlib
import inspect
import os
import subprocess
import sys
import traceback
import weakref
from contextlib import ExitStack, contextmanager
from importlib.resources import files
from math import log
from queue import Empty, Queue
from string import Formatter
from textwrap import dedent
from threading import Thread

import numpy as np
from decorator import FunctionMaker

from ._logging import logger, verbose, warn
from .check import _check_option, _validate_type


def _identity_function(x):
    return x


# TODO: no longer needed when py3.9 is minimum supported version
def _empty_hash(kind="md5"):
    func = getattr(hashlib, kind)
    if "usedforsecurity" in inspect.signature(func).parameters:
        return func(usedforsecurity=False)
    else:
        return func()


def _pl(x, non_pl="", pl="s"):
    """Determine if plural should be used."""
    len_x = x if isinstance(x, int | np.generic) else len(x)
    return non_pl if len_x == 1 else pl


def _explain_exception(start=-1, stop=None, prefix="> "):
    """Explain an exception."""
    # start=-1 means "only the most recent caller"
    etype, value, tb = sys.exc_info()
    string = traceback.format_list(traceback.extract_tb(tb)[start:stop])
    string = "".join(string).split("\n") + traceback.format_exception_only(etype, value)
    string = ":\n" + prefix + ("\n" + prefix).join(string)
    return string


def _sort_keys(x):
    """Sort and return keys of dict."""
    keys = list(x.keys())  # note: not thread-safe
    idx = np.argsort([str(k) for k in keys])
    keys = [keys[ii] for ii in idx]
    return keys


class _DefaultEventParser:
    """Parse none standard events."""

    def __init__(self):
        self.event_ids = dict()

    def __call__(self, description, offset=1):
        if description not in self.event_ids:
            self.event_ids[description] = offset + len(self.event_ids)

        return self.event_ids[description]


class _FormatDict(dict):
    """Help pformat() work properly."""

    def __missing__(self, key):
        return "{" + key + "}"


def pformat(temp, **fmt):
    """Format a template string partially.

    Examples
    --------
    >>> pformat("{a}_{b}", a='x')
    'x_{b}'
    """
    formatter = Formatter()
    mapping = _FormatDict(fmt)
    return formatter.vformat(temp, (), mapping)


def _enqueue_output(out, queue):
    for line in iter(out.readline, b""):
        queue.put(line)


@verbose
def run_subprocess(command, return_code=False, verbose=None, *args, **kwargs):
    """Run command using subprocess.Popen.

    Run command and wait for command to complete. If the return code was zero
    then return, otherwise raise CalledProcessError.
    By default, this will also add stdout= and stderr=subproces.PIPE
    to the call to Popen to suppress printing to the terminal.

    Parameters
    ----------
    command : list of str | str
        Command to run as subprocess (see subprocess.Popen documentation).
    return_code : bool
        If True, return the return code instead of raising an error if it's
        non-zero.

        .. versionadded:: 0.20
    %(verbose)s
    *args, **kwargs : arguments
        Additional arguments to pass to subprocess.Popen.

    Returns
    -------
    stdout : str
        Stdout returned by the process.
    stderr : str
        Stderr returned by the process.
    code : int
        The return code, only returned if ``return_code == True``.
    """
    all_out = ""
    all_err = ""
    # non-blocking adapted from https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python#4896288  # noqa: E501
    out_q = Queue()
    err_q = Queue()
    control_stdout = "stdout" not in kwargs
    control_stderr = "stderr" not in kwargs
    with running_subprocess(command, *args, **kwargs) as p:
        if control_stdout:
            out_t = Thread(target=_enqueue_output, args=(p.stdout, out_q))
            out_t.daemon = True
            out_t.start()
        if control_stderr:
            err_t = Thread(target=_enqueue_output, args=(p.stderr, err_q))
            err_t.daemon = True
            err_t.start()
        while True:
            do_break = p.poll() is not None
            # read all current lines without blocking
            while True:  # process stdout
                try:
                    out = out_q.get(timeout=0.01)
                except Empty:
                    break
                else:
                    out = out.decode("utf-8")
                    log_out = out.removesuffix("\n")
                    logger.info(log_out)
                    all_out += out

            while True:  # process stderr
                try:
                    err = err_q.get(timeout=0.01)
                except Empty:
                    break
                else:
                    err = err.decode("utf-8")
                    err_out = err.removesuffix("\n")

                    # Leave this as logger.warning rather than warn(...) to
                    # mirror the logger.info above for stdout. This function
                    # is basically just a version of subprocess.call, and
                    # shouldn't emit Python warnings due to stderr outputs
                    # (the calling function can check for stderr output and
                    # emit a warning if it wants).
                    logger.warning(err_out)
                    all_err += err

            if do_break:
                break
    output = (all_out, all_err)

    if return_code:
        output = output + (p.returncode,)
    elif p.returncode:
        stdout = all_out if control_stdout else None
        stderr = all_err if control_stderr else None
        raise subprocess.CalledProcessError(
            p.returncode, command, output=stdout, stderr=stderr
        )

    return output


@contextmanager
def running_subprocess(command, after="wait", verbose=None, *args, **kwargs):
    """Context manager to do something with a command running via Popen.

    Parameters
    ----------
    command : list of str | str
        Command to run as subprocess (see :class:`python:subprocess.Popen`).
    after : str
        Can be:

        - "wait" to use :meth:`~python:subprocess.Popen.wait`
        - "communicate" to use :meth:`~python.subprocess.Popen.communicate`
        - "terminate" to use :meth:`~python:subprocess.Popen.terminate`
        - "kill" to use :meth:`~python:subprocess.Popen.kill`

    %(verbose)s
    *args, **kwargs : arguments
        Additional arguments to pass to subprocess.Popen.

    Returns
    -------
    p : instance of Popen
        The process.
    """
    _validate_type(after, str, "after")
    _check_option("after", after, ["wait", "terminate", "kill", "communicate"])
    contexts = list()
    for stdxxx in ("stderr", "stdout"):
        if stdxxx not in kwargs:
            kwargs[stdxxx] = subprocess.PIPE
            contexts.append(stdxxx)

    # Check the PATH environment variable. If run_subprocess() is to be called
    # frequently this should be refactored so as to only check the path once.
    env = kwargs.get("env", os.environ)
    if any(p.startswith("~") for p in env["PATH"].split(os.pathsep)):
        warn(
            "Your PATH environment variable contains at least one path "
            'starting with a tilde ("~") character. Such paths are not '
            "interpreted correctly from within Python. It is recommended "
            'that you use "$HOME" instead of "~".'
        )
    if isinstance(command, str):
        command_str = command
    else:
        command = [str(s) for s in command]
        command_str = " ".join(s for s in command)
    logger.info(f"Running subprocess: {command_str}")
    try:
        p = subprocess.Popen(command, *args, **kwargs)
    except Exception:
        if isinstance(command, str):
            command_name = command.split()[0]
        else:
            command_name = command[0]
        logger.error(f"Command not found: {command_name}")
        raise
    try:
        with ExitStack() as stack:
            for context in contexts:
                stack.enter_context(getattr(p, context))
            yield p
    finally:
        getattr(p, after)()
        p.wait()


def _clean_names(names, remove_whitespace=False, before_dash=True):
    """Remove white-space on topo matching.

    This function handles different naming conventions for old VS new VectorView systems
    (`remove_whitespace`) and removes system specific parts in CTF channel names
    (`before_dash`).

    Usage
    -----
    # for new VectorView (only inside layout)
    ch_names = _clean_names(epochs.ch_names, remove_whitespace=True)

    # for CTF
    ch_names = _clean_names(epochs.ch_names, before_dash=True)
    """
    cleaned = []
    for name in names:
        if " " in name and remove_whitespace:
            name = name.replace(" ", "")
        if "-" in name and before_dash:
            name = name.split("-")[0]
        if name.endswith("_v"):
            name = name[:-2]
        cleaned.append(name)
    if len(set(cleaned)) != len(names):
        # this was probably not a VectorView or CTF dataset, and we now broke the
        # dataset by creating duplicates, so let's use the original channel names.
        return names
    return cleaned


def _get_argvalues():
    """Return all arguments (except self) and values of read_raw_xxx."""
    # call stack
    # read_raw_xxx -> <decorator-gen-000> -> BaseRaw.__init__ -> _get_argvalues

    # This is equivalent to `frame = inspect.stack(0)[4][0]` but faster
    frame = inspect.currentframe()
    try:
        for _ in range(3):
            frame = frame.f_back
        fname = frame.f_code.co_filename
        if not fnmatch.fnmatch(fname, "*/mne/io/*"):
            return None
        args, _, _, values = inspect.getargvalues(frame)
    finally:
        del frame
    params = dict()
    for arg in args:
        params[arg] = values[arg]
    params.pop("self", None)
    return params


def sizeof_fmt(num):
    """Turn number of bytes into human-readable str.

    Parameters
    ----------
    num : int
        The number of bytes.

    Returns
    -------
    size : str
        The size in human-readable format.
    """
    units = ["bytes", "KiB", "MiB", "GiB", "TiB", "PiB"]
    decimals = [0, 0, 1, 2, 2, 2]
    if num > 1:
        exponent = min(int(log(num, 1024)), len(units) - 1)
        quotient = float(num) / 1024**exponent
        unit = units[exponent]
        num_decimals = decimals[exponent]
        format_string = f"{{0:.{num_decimals}f}} {{1}}"
        return format_string.format(quotient, unit)
    if num == 0:
        return "0 bytes"
    if num == 1:
        return "1 byte"


def _file_like(obj):
    # An alternative would be::
    #
    #   isinstance(obj, (TextIOBase, BufferedIOBase, RawIOBase, IOBase))
    #
    # but this might be more robust to file-like objects not properly
    # inheriting from these classes:
    return all(callable(getattr(obj, name, None)) for name in ("read", "seek"))


def _fullname(obj):
    klass = obj.__class__
    module = klass.__module__
    if module == "builtins":
        return klass.__qualname__
    return module + "." + klass.__qualname__


def _assert_no_instances(cls, when=""):
    __tracebackhide__ = True
    n = 0
    ref = list()
    gc.collect()
    objs = gc.get_objects()
    for obj in objs:
        try:
            check = isinstance(obj, cls)
        except Exception:  # such as a weakref
            check = False
        if check:
            if cls.__name__ == "Brain":
                ref.append(f"Brain._cleaned = {getattr(obj, '_cleaned', None)}")
            rr = gc.get_referrers(obj)
            count = 0
            for r in rr:
                if (
                    r is not objs
                    and r is not globals()
                    and r is not locals()
                    and not inspect.isframe(r)
                ):
                    if isinstance(r, list | dict | tuple):
                        rep = f"len={len(r)}"
                        r_ = gc.get_referrers(r)
                        types = (_fullname(x) for x in r_)
                        types = "/".join(sorted(set(x for x in types if x is not None)))
                        rep += f", {len(r_)} referrers: {types}"
                        del r_
                    else:
                        rep = repr(r)[:100].replace("\n", " ")
                        # If it's a __closure__, get more information
                        if rep.startswith("<cell at "):
                            try:
                                rep += f" ({repr(r.cell_contents)[:100]})"
                            except Exception:
                                pass
                    name = _fullname(r)
                    ref.append(f"{name}: {rep}")
                    count += 1
                del r
            del rr
            n += count > 0
        del obj
    del objs
    gc.collect()
    assert n == 0, f"\n{n} {cls.__name__} @ {when}:\n" + "\n".join(ref)


def _resource_path(submodule, filename):
    """Return a full system path to a package resource (AKA a file).

    Parameters
    ----------
    submodule : str
        An import-style module or submodule name
        (e.g., "mne.datasets.testing").
    filename : str
        The file whose full path you want.

    Returns
    -------
    path : str
        The full system path to the requested file.
    """
    return files(submodule).joinpath(filename)


def repr_html(f):
    """Decorate _repr_html_ methods.

    If a _repr_html_ method is decorated with this decorator, the repr in a
    notebook will show HTML or plain text depending on the config value
    MNE_REPR_HTML (by default "true", which will render HTML).

    Parameters
    ----------
    f : function
        The function to decorate.

    Returns
    -------
    wrapper : function
        The decorated function.
    """
    from ..utils import get_config

    def wrapper(*args, **kwargs):
        if get_config("MNE_REPR_HTML", "true").lower() == "false":
            import html

            r = "<pre>" + html.escape(repr(args[0])) + "</pre>"
            return r.replace("\n", "<br/>")
        else:
            return f(*args, **kwargs)

    return wrapper


def _auto_weakref(function):
    """Create weakrefs to self (or other free vars in __closure__) then evaluate.

    When a nested function is defined within an instance method, and the function makes
    use of ``self``, it creates a reference cycle that the Python garbage collector is
    not smart enough to resolve, so the parent object is never GC'd. (The reference to
    ``self`` becomes part of the ``__closure__`` of the nested function).

    This decorator allows the nested function to access ``self`` without increasing the
    reference counter on ``self``, which will prevent the memory leak. If the referent
    is not found (usually because already GC'd) it will short-circuit the decorated
    function and return ``None``.
    """
    names = function.__code__.co_freevars
    assert len(names) == len(function.__closure__)
    __weakref_values__ = dict()
    evaldict = dict(__weakref_values__=__weakref_values__)
    for name, value in zip(names, function.__closure__):
        __weakref_values__[name] = weakref.ref(value.cell_contents)
    body = dedent(inspect.getsource(function))
    body = body.splitlines()
    for li, line in enumerate(body):
        if line.startswith(" "):
            body = body[li:]
            break
    old_body = "\n".join(body)
    body = """\
def %(name)s(%(signature)s):
"""
    for name in names:
        body += f"""
    {name} = __weakref_values__[{repr(name)}]()
    if {name} is None:
        return
"""
    body = body + old_body
    fm = FunctionMaker(function)
    fun = fm.make(body, evaldict, addsource=True)
    fun.__globals__.update(function.__globals__)
    assert fun.__closure__ is None, fun.__closure__
    return fun