""" Printing tools. """ from __future__ import annotations from collections.abc import ( Iterable, Mapping, Sequence, ) import sys from typing import ( Any, Callable, TypeVar, Union, ) from unicodedata import east_asian_width from pandas._config import get_option from pandas.core.dtypes.inference import is_sequence from pandas.io.formats.console import get_console_size EscapeChars = Union[Mapping[str, str], Iterable[str]] _KT = TypeVar("_KT") _VT = TypeVar("_VT") def adjoin(space: int, *lists: list[str], **kwargs) -> str: """ Glues together two sets of strings using the amount of space requested. The idea is to prettify. ---------- space : int number of spaces for padding lists : str list of str which being joined strlen : callable function used to calculate the length of each str. Needed for unicode handling. justfunc : callable function used to justify str. Needed for unicode handling. """ strlen = kwargs.pop("strlen", len) justfunc = kwargs.pop("justfunc", _adj_justify) newLists = [] lengths = [max(map(strlen, x)) + space for x in lists[:-1]] # not the last one lengths.append(max(map(len, lists[-1]))) maxLen = max(map(len, lists)) for i, lst in enumerate(lists): nl = justfunc(lst, lengths[i], mode="left") nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl newLists.append(nl) toJoin = zip(*newLists) return "\n".join("".join(lines) for lines in toJoin) def _adj_justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]: """ Perform ljust, center, rjust against string or list-like """ if mode == "left": return [x.ljust(max_len) for x in texts] elif mode == "center": return [x.center(max_len) for x in texts] else: return [x.rjust(max_len) for x in texts] # Unicode consolidation # --------------------- # # pprinting utility functions for generating Unicode text or # bytes(3.x)/str(2.x) representations of objects. # Try to use these as much as possible rather than rolling your own. # # When to use # ----------- # # 1) If you're writing code internal to pandas (no I/O directly involved), # use pprint_thing(). # # It will always return unicode text which can handled by other # parts of the package without breakage. # # 2) if you need to write something out to file, use # pprint_thing_encoded(encoding). # # If no encoding is specified, it defaults to utf-8. Since encoding pure # ascii with utf-8 is a no-op you can safely use the default utf-8 if you're # working with straight ascii. def _pprint_seq( seq: Sequence, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds ) -> str: """ internal. pprinter for iterables. you should probably use pprint_thing() rather than calling this directly. bounds length of printed sequence, depending on options """ if isinstance(seq, set): fmt = "{{{body}}}" else: fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})" if max_seq_items is False: nitems = len(seq) else: nitems = max_seq_items or get_option("max_seq_items") or len(seq) s = iter(seq) # handle sets, no slicing r = [ pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) for i in range(min(nitems, len(seq))) ] body = ", ".join(r) if nitems < len(seq): body += ", ..." elif isinstance(seq, tuple) and len(seq) == 1: body += "," return fmt.format(body=body) def _pprint_dict( seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds ) -> str: """ internal. pprinter for iterables. you should probably use pprint_thing() rather than calling this directly. """ fmt = "{{{things}}}" pairs = [] pfmt = "{key}: {val}" if max_seq_items is False: nitems = len(seq) else: nitems = max_seq_items or get_option("max_seq_items") or len(seq) for k, v in list(seq.items())[:nitems]: pairs.append( pfmt.format( key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), ) ) if nitems < len(seq): return fmt.format(things=", ".join(pairs) + ", ...") else: return fmt.format(things=", ".join(pairs)) def pprint_thing( thing: Any, _nest_lvl: int = 0, escape_chars: EscapeChars | None = None, default_escapes: bool = False, quote_strings: bool = False, max_seq_items: int | None = None, ) -> str: """ This function is the sanctioned way of converting objects to a string representation and properly handles nested sequences. Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : int or None, default None Pass through to other pretty printers to limit sequence printing Returns ------- str """ def as_escaped_string( thing: Any, escape_chars: EscapeChars | None = escape_chars ) -> str: translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"} if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or () result = str(thing) for c in escape_chars: result = result.replace(c, translate[c]) return result if hasattr(thing, "__next__"): return str(thing) elif isinstance(thing, dict) and _nest_lvl < get_option( "display.pprint_nest_depth" ): result = _pprint_dict( thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items ) elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"): result = _pprint_seq( thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items, ) elif isinstance(thing, str) and quote_strings: result = f"'{as_escaped_string(thing)}'" else: result = as_escaped_string(thing) return result def pprint_thing_encoded( object, encoding: str = "utf-8", errors: str = "replace" ) -> bytes: value = pprint_thing(object) # get unicode representation of object return value.encode(encoding, errors) def enable_data_resource_formatter(enable: bool) -> None: if "IPython" not in sys.modules: # definitely not in IPython return from IPython import get_ipython ip = get_ipython() if ip is None: # still not in IPython return formatters = ip.display_formatter.formatters mimetype = "application/vnd.dataresource+json" if enable: if mimetype not in formatters: # define tableschema formatter from IPython.core.formatters import BaseFormatter from traitlets import ObjectName class TableSchemaFormatter(BaseFormatter): print_method = ObjectName("_repr_data_resource_") _return_type = (dict,) # register it: formatters[mimetype] = TableSchemaFormatter() # enable it if it's been disabled: formatters[mimetype].enabled = True # unregister tableschema mime-type elif mimetype in formatters: formatters[mimetype].enabled = False def default_pprint(thing: Any, max_seq_items: int | None = None) -> str: return pprint_thing( thing, escape_chars=("\t", "\r", "\n"), quote_strings=True, max_seq_items=max_seq_items, ) def format_object_summary( obj, formatter: Callable, is_justify: bool = True, name: str | None = None, indent_for_name: bool = True, line_break_each_value: bool = False, ) -> str: """ Return the formatted obj as a unicode string Parameters ---------- obj : object must be iterable and support __getitem__ formatter : callable string formatter for an element is_justify : bool should justify the display name : name, optional defaults to the class name of the obj indent_for_name : bool, default True Whether subsequent lines should be indented to align with the name. line_break_each_value : bool, default False If True, inserts a line break for each value of ``obj``. If False, only break lines when the a line of values gets wider than the display width. Returns ------- summary string """ display_width, _ = get_console_size() if display_width is None: display_width = get_option("display.width") or 80 if name is None: name = type(obj).__name__ if indent_for_name: name_len = len(name) space1 = f'\n{(" " * (name_len + 1))}' space2 = f'\n{(" " * (name_len + 2))}' else: space1 = "\n" space2 = "\n " # space for the opening '[' n = len(obj) if line_break_each_value: # If we want to vertically align on each value of obj, we need to # separate values by a line break and indent the values sep = ",\n " + " " * len(name) else: sep = "," max_seq_items = get_option("display.max_seq_items") or n # are we a truncated display is_truncated = n > max_seq_items # adj can optionally handle unicode eastern asian width adj = get_adjustment() def _extend_line( s: str, line: str, value: str, display_width: int, next_line_prefix: str ) -> tuple[str, str]: if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width: s += line.rstrip() line = next_line_prefix line += value return s, line def best_len(values: list[str]) -> int: if values: return max(adj.len(x) for x in values) else: return 0 close = ", " if n == 0: summary = f"[]{close}" elif n == 1 and not line_break_each_value: first = formatter(obj[0]) summary = f"[{first}]{close}" elif n == 2 and not line_break_each_value: first = formatter(obj[0]) last = formatter(obj[-1]) summary = f"[{first}, {last}]{close}" else: if max_seq_items == 1: # If max_seq_items=1 show only last element head = [] tail = [formatter(x) for x in obj[-1:]] elif n > max_seq_items: n = min(max_seq_items // 2, 10) head = [formatter(x) for x in obj[:n]] tail = [formatter(x) for x in obj[-n:]] else: head = [] tail = [formatter(x) for x in obj] # adjust all values to max length if needed if is_justify: if line_break_each_value: # Justify each string in the values of head and tail, so the # strings will right align when head and tail are stacked # vertically. head, tail = _justify(head, tail) elif is_truncated or not ( len(", ".join(head)) < display_width and len(", ".join(tail)) < display_width ): # Each string in head and tail should align with each other max_length = max(best_len(head), best_len(tail)) head = [x.rjust(max_length) for x in head] tail = [x.rjust(max_length) for x in tail] # If we are not truncated and we are only a single # line, then don't justify if line_break_each_value: # Now head and tail are of type List[Tuple[str]]. Below we # convert them into List[str], so there will be one string per # value. Also truncate items horizontally if wider than # max_space max_space = display_width - len(space2) value = tail[0] max_items = 1 for num_items in reversed(range(1, len(value) + 1)): pprinted_seq = _pprint_seq(value, max_seq_items=num_items) if len(pprinted_seq) < max_space: max_items = num_items break head = [_pprint_seq(x, max_seq_items=max_items) for x in head] tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail] summary = "" line = space2 for head_value in head: word = head_value + sep + " " summary, line = _extend_line(summary, line, word, display_width, space2) if is_truncated: # remove trailing space of last line summary += line.rstrip() + space2 + "..." line = space2 for tail_item in tail[:-1]: word = tail_item + sep + " " summary, line = _extend_line(summary, line, word, display_width, space2) # last value: no sep added + 1 space of width used for trailing ',' summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2) summary += line # right now close is either '' or ', ' # Now we want to include the ']', but not the maybe space. close = "]" + close.rstrip(" ") summary += close if len(summary) > (display_width) or line_break_each_value: summary += space1 else: # one row summary += " " # remove initial space summary = "[" + summary[len(space2) :] return summary def _justify( head: list[Sequence[str]], tail: list[Sequence[str]] ) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]: """ Justify items in head and tail, so they are right-aligned when stacked. Parameters ---------- head : list-like of list-likes of strings tail : list-like of list-likes of strings Returns ------- tuple of list of tuples of strings Same as head and tail, but items are right aligned when stacked vertically. Examples -------- >>> _justify([['a', 'b']], [['abc', 'abcd']]) ([(' a', ' b')], [('abc', 'abcd')]) """ combined = head + tail # For each position for the sequences in ``combined``, # find the length of the largest string. max_length = [0] * len(combined[0]) for inner_seq in combined: length = [len(item) for item in inner_seq] max_length = [max(x, y) for x, y in zip(max_length, length)] # justify each item in each list-like in head and tail using max_length head_tuples = [ tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head ] tail_tuples = [ tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail ] return head_tuples, tail_tuples class PrettyDict(dict[_KT, _VT]): """Dict extension to support abbreviated __repr__""" def __repr__(self) -> str: return pprint_thing(self) class _TextAdjustment: def __init__(self) -> None: self.encoding = get_option("display.encoding") def len(self, text: str) -> int: return len(text) def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]: """ Perform ljust, center, rjust against string or list-like """ if mode == "left": return [x.ljust(max_len) for x in texts] elif mode == "center": return [x.center(max_len) for x in texts] else: return [x.rjust(max_len) for x in texts] def adjoin(self, space: int, *lists, **kwargs) -> str: return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs) class _EastAsianTextAdjustment(_TextAdjustment): def __init__(self) -> None: super().__init__() if get_option("display.unicode.ambiguous_as_wide"): self.ambiguous_width = 2 else: self.ambiguous_width = 1 # Definition of East Asian Width # https://unicode.org/reports/tr11/ # Ambiguous width can be changed by option self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1} def len(self, text: str) -> int: """ Calculate display width considering unicode East Asian Width """ if not isinstance(text, str): return len(text) return sum( self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text ) def justify( self, texts: Iterable[str], max_len: int, mode: str = "right" ) -> list[str]: # re-calculate padding space per str considering East Asian Width def _get_pad(t): return max_len - self.len(t) + len(t) if mode == "left": return [x.ljust(_get_pad(x)) for x in texts] elif mode == "center": return [x.center(_get_pad(x)) for x in texts] else: return [x.rjust(_get_pad(x)) for x in texts] def get_adjustment() -> _TextAdjustment: use_east_asian_width = get_option("display.unicode.east_asian_width") if use_east_asian_width: return _EastAsianTextAdjustment() else: return _TextAdjustment()