510 lines
16 KiB
Python
510 lines
16 KiB
Python
"""Some miscellaneous utility functions."""
|
|
|
|
# Authors: The MNE-Python contributors.
|
|
# License: BSD-3-Clause
|
|
# Copyright the MNE-Python contributors.
|
|
|
|
import fnmatch
|
|
import gc
|
|
import hashlib
|
|
import inspect
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import traceback
|
|
import weakref
|
|
from contextlib import ExitStack, contextmanager
|
|
from importlib.resources import files
|
|
from math import log
|
|
from queue import Empty, Queue
|
|
from string import Formatter
|
|
from textwrap import dedent
|
|
from threading import Thread
|
|
|
|
import numpy as np
|
|
from decorator import FunctionMaker
|
|
|
|
from ._logging import logger, verbose, warn
|
|
from .check import _check_option, _validate_type
|
|
|
|
|
|
def _identity_function(x):
|
|
return x
|
|
|
|
|
|
# TODO: no longer needed when py3.9 is minimum supported version
|
|
def _empty_hash(kind="md5"):
|
|
func = getattr(hashlib, kind)
|
|
if "usedforsecurity" in inspect.signature(func).parameters:
|
|
return func(usedforsecurity=False)
|
|
else:
|
|
return func()
|
|
|
|
|
|
def _pl(x, non_pl="", pl="s"):
|
|
"""Determine if plural should be used."""
|
|
len_x = x if isinstance(x, (int, np.generic)) else len(x)
|
|
return non_pl if len_x == 1 else pl
|
|
|
|
|
|
def _explain_exception(start=-1, stop=None, prefix="> "):
|
|
"""Explain an exception."""
|
|
# start=-1 means "only the most recent caller"
|
|
etype, value, tb = sys.exc_info()
|
|
string = traceback.format_list(traceback.extract_tb(tb)[start:stop])
|
|
string = "".join(string).split("\n") + traceback.format_exception_only(etype, value)
|
|
string = ":\n" + prefix + ("\n" + prefix).join(string)
|
|
return string
|
|
|
|
|
|
def _sort_keys(x):
|
|
"""Sort and return keys of dict."""
|
|
keys = list(x.keys()) # note: not thread-safe
|
|
idx = np.argsort([str(k) for k in keys])
|
|
keys = [keys[ii] for ii in idx]
|
|
return keys
|
|
|
|
|
|
class _DefaultEventParser:
|
|
"""Parse none standard events."""
|
|
|
|
def __init__(self):
|
|
self.event_ids = dict()
|
|
|
|
def __call__(self, description, offset=1):
|
|
if description not in self.event_ids:
|
|
self.event_ids[description] = offset + len(self.event_ids)
|
|
|
|
return self.event_ids[description]
|
|
|
|
|
|
class _FormatDict(dict):
|
|
"""Help pformat() work properly."""
|
|
|
|
def __missing__(self, key):
|
|
return "{" + key + "}"
|
|
|
|
|
|
def pformat(temp, **fmt):
|
|
"""Format a template string partially.
|
|
|
|
Examples
|
|
--------
|
|
>>> pformat("{a}_{b}", a='x')
|
|
'x_{b}'
|
|
"""
|
|
formatter = Formatter()
|
|
mapping = _FormatDict(fmt)
|
|
return formatter.vformat(temp, (), mapping)
|
|
|
|
|
|
def _enqueue_output(out, queue):
|
|
for line in iter(out.readline, b""):
|
|
queue.put(line)
|
|
|
|
|
|
@verbose
|
|
def run_subprocess(command, return_code=False, verbose=None, *args, **kwargs):
|
|
"""Run command using subprocess.Popen.
|
|
|
|
Run command and wait for command to complete. If the return code was zero
|
|
then return, otherwise raise CalledProcessError.
|
|
By default, this will also add stdout= and stderr=subproces.PIPE
|
|
to the call to Popen to suppress printing to the terminal.
|
|
|
|
Parameters
|
|
----------
|
|
command : list of str | str
|
|
Command to run as subprocess (see subprocess.Popen documentation).
|
|
return_code : bool
|
|
If True, return the return code instead of raising an error if it's
|
|
non-zero.
|
|
|
|
.. versionadded:: 0.20
|
|
%(verbose)s
|
|
*args, **kwargs : arguments
|
|
Additional arguments to pass to subprocess.Popen.
|
|
|
|
Returns
|
|
-------
|
|
stdout : str
|
|
Stdout returned by the process.
|
|
stderr : str
|
|
Stderr returned by the process.
|
|
code : int
|
|
The return code, only returned if ``return_code == True``.
|
|
"""
|
|
all_out = ""
|
|
all_err = ""
|
|
# non-blocking adapted from https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python#4896288 # noqa: E501
|
|
out_q = Queue()
|
|
err_q = Queue()
|
|
control_stdout = "stdout" not in kwargs
|
|
control_stderr = "stderr" not in kwargs
|
|
with running_subprocess(command, *args, **kwargs) as p:
|
|
if control_stdout:
|
|
out_t = Thread(target=_enqueue_output, args=(p.stdout, out_q))
|
|
out_t.daemon = True
|
|
out_t.start()
|
|
if control_stderr:
|
|
err_t = Thread(target=_enqueue_output, args=(p.stderr, err_q))
|
|
err_t.daemon = True
|
|
err_t.start()
|
|
while True:
|
|
do_break = p.poll() is not None
|
|
# read all current lines without blocking
|
|
while True: # process stdout
|
|
try:
|
|
out = out_q.get(timeout=0.01)
|
|
except Empty:
|
|
break
|
|
else:
|
|
out = out.decode("utf-8")
|
|
log_out = out.removesuffix("\n")
|
|
logger.info(log_out)
|
|
all_out += out
|
|
|
|
while True: # process stderr
|
|
try:
|
|
err = err_q.get(timeout=0.01)
|
|
except Empty:
|
|
break
|
|
else:
|
|
err = err.decode("utf-8")
|
|
err_out = err.removesuffix("\n")
|
|
|
|
# Leave this as logger.warning rather than warn(...) to
|
|
# mirror the logger.info above for stdout. This function
|
|
# is basically just a version of subprocess.call, and
|
|
# shouldn't emit Python warnings due to stderr outputs
|
|
# (the calling function can check for stderr output and
|
|
# emit a warning if it wants).
|
|
logger.warning(err_out)
|
|
all_err += err
|
|
|
|
if do_break:
|
|
break
|
|
output = (all_out, all_err)
|
|
|
|
if return_code:
|
|
output = output + (p.returncode,)
|
|
elif p.returncode:
|
|
stdout = all_out if control_stdout else None
|
|
stderr = all_err if control_stderr else None
|
|
raise subprocess.CalledProcessError(
|
|
p.returncode, command, output=stdout, stderr=stderr
|
|
)
|
|
|
|
return output
|
|
|
|
|
|
@contextmanager
|
|
def running_subprocess(command, after="wait", verbose=None, *args, **kwargs):
|
|
"""Context manager to do something with a command running via Popen.
|
|
|
|
Parameters
|
|
----------
|
|
command : list of str | str
|
|
Command to run as subprocess (see :class:`python:subprocess.Popen`).
|
|
after : str
|
|
Can be:
|
|
|
|
- "wait" to use :meth:`~python:subprocess.Popen.wait`
|
|
- "communicate" to use :meth:`~python.subprocess.Popen.communicate`
|
|
- "terminate" to use :meth:`~python:subprocess.Popen.terminate`
|
|
- "kill" to use :meth:`~python:subprocess.Popen.kill`
|
|
|
|
%(verbose)s
|
|
*args, **kwargs : arguments
|
|
Additional arguments to pass to subprocess.Popen.
|
|
|
|
Returns
|
|
-------
|
|
p : instance of Popen
|
|
The process.
|
|
"""
|
|
_validate_type(after, str, "after")
|
|
_check_option("after", after, ["wait", "terminate", "kill", "communicate"])
|
|
contexts = list()
|
|
for stdxxx in ("stderr", "stdout"):
|
|
if stdxxx not in kwargs:
|
|
kwargs[stdxxx] = subprocess.PIPE
|
|
contexts.append(stdxxx)
|
|
|
|
# Check the PATH environment variable. If run_subprocess() is to be called
|
|
# frequently this should be refactored so as to only check the path once.
|
|
env = kwargs.get("env", os.environ)
|
|
if any(p.startswith("~") for p in env["PATH"].split(os.pathsep)):
|
|
warn(
|
|
"Your PATH environment variable contains at least one path "
|
|
'starting with a tilde ("~") character. Such paths are not '
|
|
"interpreted correctly from within Python. It is recommended "
|
|
'that you use "$HOME" instead of "~".'
|
|
)
|
|
if isinstance(command, str):
|
|
command_str = command
|
|
else:
|
|
command = [str(s) for s in command]
|
|
command_str = " ".join(s for s in command)
|
|
logger.info(f"Running subprocess: {command_str}")
|
|
try:
|
|
p = subprocess.Popen(command, *args, **kwargs)
|
|
except Exception:
|
|
if isinstance(command, str):
|
|
command_name = command.split()[0]
|
|
else:
|
|
command_name = command[0]
|
|
logger.error(f"Command not found: {command_name}")
|
|
raise
|
|
try:
|
|
with ExitStack() as stack:
|
|
for context in contexts:
|
|
stack.enter_context(getattr(p, context))
|
|
yield p
|
|
finally:
|
|
getattr(p, after)()
|
|
p.wait()
|
|
|
|
|
|
def _clean_names(names, remove_whitespace=False, before_dash=True):
|
|
"""Remove white-space on topo matching.
|
|
|
|
This function handles different naming conventions for old VS new VectorView systems
|
|
(`remove_whitespace`) and removes system specific parts in CTF channel names
|
|
(`before_dash`).
|
|
|
|
Usage
|
|
-----
|
|
# for new VectorView (only inside layout)
|
|
ch_names = _clean_names(epochs.ch_names, remove_whitespace=True)
|
|
|
|
# for CTF
|
|
ch_names = _clean_names(epochs.ch_names, before_dash=True)
|
|
"""
|
|
cleaned = []
|
|
for name in names:
|
|
if " " in name and remove_whitespace:
|
|
name = name.replace(" ", "")
|
|
if "-" in name and before_dash:
|
|
name = name.split("-")[0]
|
|
if name.endswith("_v"):
|
|
name = name[:-2]
|
|
cleaned.append(name)
|
|
if len(set(cleaned)) != len(names):
|
|
# this was probably not a VectorView or CTF dataset, and we now broke the
|
|
# dataset by creating duplicates, so let's use the original channel names.
|
|
return names
|
|
return cleaned
|
|
|
|
|
|
def _get_argvalues():
|
|
"""Return all arguments (except self) and values of read_raw_xxx."""
|
|
# call stack
|
|
# read_raw_xxx -> <decorator-gen-000> -> BaseRaw.__init__ -> _get_argvalues
|
|
|
|
# This is equivalent to `frame = inspect.stack(0)[4][0]` but faster
|
|
frame = inspect.currentframe()
|
|
try:
|
|
for _ in range(3):
|
|
frame = frame.f_back
|
|
fname = frame.f_code.co_filename
|
|
if not fnmatch.fnmatch(fname, "*/mne/io/*"):
|
|
return None
|
|
args, _, _, values = inspect.getargvalues(frame)
|
|
finally:
|
|
del frame
|
|
params = dict()
|
|
for arg in args:
|
|
params[arg] = values[arg]
|
|
params.pop("self", None)
|
|
return params
|
|
|
|
|
|
def sizeof_fmt(num):
|
|
"""Turn number of bytes into human-readable str.
|
|
|
|
Parameters
|
|
----------
|
|
num : int
|
|
The number of bytes.
|
|
|
|
Returns
|
|
-------
|
|
size : str
|
|
The size in human-readable format.
|
|
"""
|
|
units = ["bytes", "kB", "MB", "GB", "TB", "PB"]
|
|
decimals = [0, 0, 1, 2, 2, 2]
|
|
if num > 1:
|
|
exponent = min(int(log(num, 1024)), len(units) - 1)
|
|
quotient = float(num) / 1024**exponent
|
|
unit = units[exponent]
|
|
num_decimals = decimals[exponent]
|
|
format_string = f"{{0:.{num_decimals}f}} {{1}}"
|
|
return format_string.format(quotient, unit)
|
|
if num == 0:
|
|
return "0 bytes"
|
|
if num == 1:
|
|
return "1 byte"
|
|
|
|
|
|
def _file_like(obj):
|
|
# An alternative would be::
|
|
#
|
|
# isinstance(obj, (TextIOBase, BufferedIOBase, RawIOBase, IOBase))
|
|
#
|
|
# but this might be more robust to file-like objects not properly
|
|
# inheriting from these classes:
|
|
return all(callable(getattr(obj, name, None)) for name in ("read", "seek"))
|
|
|
|
|
|
def _fullname(obj):
|
|
klass = obj.__class__
|
|
module = klass.__module__
|
|
if module == "builtins":
|
|
return klass.__qualname__
|
|
return module + "." + klass.__qualname__
|
|
|
|
|
|
def _assert_no_instances(cls, when=""):
|
|
__tracebackhide__ = True
|
|
n = 0
|
|
ref = list()
|
|
gc.collect()
|
|
objs = gc.get_objects()
|
|
for obj in objs:
|
|
try:
|
|
check = isinstance(obj, cls)
|
|
except Exception: # such as a weakref
|
|
check = False
|
|
if check:
|
|
if cls.__name__ == "Brain":
|
|
ref.append(f'Brain._cleaned = {getattr(obj, "_cleaned", None)}')
|
|
rr = gc.get_referrers(obj)
|
|
count = 0
|
|
for r in rr:
|
|
if (
|
|
r is not objs
|
|
and r is not globals()
|
|
and r is not locals()
|
|
and not inspect.isframe(r)
|
|
):
|
|
if isinstance(r, (list, dict, tuple)):
|
|
rep = f"len={len(r)}"
|
|
r_ = gc.get_referrers(r)
|
|
types = (_fullname(x) for x in r_)
|
|
types = "/".join(sorted(set(x for x in types if x is not None)))
|
|
rep += f", {len(r_)} referrers: {types}"
|
|
del r_
|
|
else:
|
|
rep = repr(r)[:100].replace("\n", " ")
|
|
# If it's a __closure__, get more information
|
|
if rep.startswith("<cell at "):
|
|
try:
|
|
rep += f" ({repr(r.cell_contents)[:100]})"
|
|
except Exception:
|
|
pass
|
|
name = _fullname(r)
|
|
ref.append(f"{name}: {rep}")
|
|
count += 1
|
|
del r
|
|
del rr
|
|
n += count > 0
|
|
del obj
|
|
del objs
|
|
gc.collect()
|
|
assert n == 0, f"\n{n} {cls.__name__} @ {when}:\n" + "\n".join(ref)
|
|
|
|
|
|
def _resource_path(submodule, filename):
|
|
"""Return a full system path to a package resource (AKA a file).
|
|
|
|
Parameters
|
|
----------
|
|
submodule : str
|
|
An import-style module or submodule name
|
|
(e.g., "mne.datasets.testing").
|
|
filename : str
|
|
The file whose full path you want.
|
|
|
|
Returns
|
|
-------
|
|
path : str
|
|
The full system path to the requested file.
|
|
"""
|
|
return files(submodule).joinpath(filename)
|
|
|
|
|
|
def repr_html(f):
|
|
"""Decorate _repr_html_ methods.
|
|
|
|
If a _repr_html_ method is decorated with this decorator, the repr in a
|
|
notebook will show HTML or plain text depending on the config value
|
|
MNE_REPR_HTML (by default "true", which will render HTML).
|
|
|
|
Parameters
|
|
----------
|
|
f : function
|
|
The function to decorate.
|
|
|
|
Returns
|
|
-------
|
|
wrapper : function
|
|
The decorated function.
|
|
"""
|
|
from ..utils import get_config
|
|
|
|
def wrapper(*args, **kwargs):
|
|
if get_config("MNE_REPR_HTML", "true").lower() == "false":
|
|
import html
|
|
|
|
r = "<pre>" + html.escape(repr(args[0])) + "</pre>"
|
|
return r.replace("\n", "<br/>")
|
|
else:
|
|
return f(*args, **kwargs)
|
|
|
|
return wrapper
|
|
|
|
|
|
def _auto_weakref(function):
|
|
"""Create weakrefs to self (or other free vars in __closure__) then evaluate.
|
|
|
|
When a nested function is defined within an instance method, and the function makes
|
|
use of ``self``, it creates a reference cycle that the Python garbage collector is
|
|
not smart enough to resolve, so the parent object is never GC'd. (The reference to
|
|
``self`` becomes part of the ``__closure__`` of the nested function).
|
|
|
|
This decorator allows the nested function to access ``self`` without increasing the
|
|
reference counter on ``self``, which will prevent the memory leak. If the referent
|
|
is not found (usually because already GC'd) it will short-circuit the decorated
|
|
function and return ``None``.
|
|
"""
|
|
names = function.__code__.co_freevars
|
|
assert len(names) == len(function.__closure__)
|
|
__weakref_values__ = dict()
|
|
evaldict = dict(__weakref_values__=__weakref_values__)
|
|
for name, value in zip(names, function.__closure__):
|
|
__weakref_values__[name] = weakref.ref(value.cell_contents)
|
|
body = dedent(inspect.getsource(function))
|
|
body = body.splitlines()
|
|
for li, line in enumerate(body):
|
|
if line.startswith(" "):
|
|
body = body[li:]
|
|
break
|
|
old_body = "\n".join(body)
|
|
body = """\
|
|
def %(name)s(%(signature)s):
|
|
"""
|
|
for name in names:
|
|
body += f"""
|
|
{name} = __weakref_values__[{repr(name)}]()
|
|
if {name} is None:
|
|
return
|
|
"""
|
|
body = body + old_body
|
|
fm = FunctionMaker(function)
|
|
fun = fm.make(body, evaldict, addsource=True)
|
|
fun.__globals__.update(function.__globals__)
|
|
assert fun.__closure__ is None, fun.__closure__
|
|
return fun
|