import numpy as np
import pytest
from pandas import (
NA,
DataFrame,
IndexSlice,
MultiIndex,
NaT,
Timestamp,
option_context,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
from pandas.io.formats.style_render import _str_escape
@pytest.fixture
def df():
return DataFrame(
data=[[0, -0.609], [1, -1.228]],
columns=["A", "B"],
index=["x", "y"],
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
@pytest.fixture
def df_multi():
return DataFrame(
data=np.arange(16).reshape(4, 4),
columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]),
index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]),
)
@pytest.fixture
def styler_multi(df_multi):
return Styler(df_multi, uuid_len=0)
def test_display_format(styler):
ctx = styler.format("{:0.1f}")._translate(True, True)
assert all(["display_value" in c for c in row] for row in ctx["body"])
assert all([len(c["display_value"]) <= 3 for c in row[1:]] for row in ctx["body"])
assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize("columns", [True, False])
def test_display_format_index(styler, index, columns):
exp_index = ["x", "y"]
if index:
styler.format_index(lambda v: v.upper(), axis=0) # test callable
exp_index = ["X", "Y"]
exp_columns = ["A", "B"]
if columns:
styler.format_index("*{}*", axis=1) # test string
exp_columns = ["*A*", "*B*"]
ctx = styler._translate(True, True)
for r, row in enumerate(ctx["body"]):
assert row[0]["display_value"] == exp_index[r]
for c, col in enumerate(ctx["head"][1:]):
assert col["display_value"] == exp_columns[c]
def test_format_dict(styler):
ctx = styler.format({"A": "{:0.1f}", "B": "{0:.2%}"})._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "0.0"
assert ctx["body"][0][2]["display_value"] == "-60.90%"
def test_format_index_dict(styler):
ctx = styler.format_index({0: lambda v: v.upper()})._translate(True, True)
for i, val in enumerate(["X", "Y"]):
assert ctx["body"][i][0]["display_value"] == val
def test_format_string(styler):
ctx = styler.format("{:.2f}")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "0.00"
assert ctx["body"][0][2]["display_value"] == "-0.61"
assert ctx["body"][1][1]["display_value"] == "1.00"
assert ctx["body"][1][2]["display_value"] == "-1.23"
def test_format_callable(styler):
ctx = styler.format(lambda v: "neg" if v < 0 else "pos")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "pos"
assert ctx["body"][0][2]["display_value"] == "neg"
assert ctx["body"][1][1]["display_value"] == "pos"
assert ctx["body"][1][2]["display_value"] == "neg"
def test_format_with_na_rep():
# GH 21527 28358
df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
ctx = df.style.format(None, na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
ctx = df.style.format("{:.2%}", na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][1]["display_value"] == "110.00%"
assert ctx["body"][1][2]["display_value"] == "120.00%"
ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate(True, True)
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][2]["display_value"] == "120.00%"
def test_format_index_with_na_rep():
df = DataFrame([[1, 2, 3, 4, 5]], columns=["A", None, np.nan, NaT, NA])
ctx = df.style.format_index(None, na_rep="--", axis=1)._translate(True, True)
assert ctx["head"][0][1]["display_value"] == "A"
for i in [2, 3, 4, 5]:
assert ctx["head"][0][i]["display_value"] == "--"
def test_format_non_numeric_na():
# GH 21527 28358
df = DataFrame(
{
"object": [None, np.nan, "foo"],
"datetime": [None, NaT, Timestamp("20120101")],
}
)
ctx = df.style.format(None, na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][1]["display_value"] == "-"
assert ctx["body"][1][2]["display_value"] == "-"
@pytest.mark.parametrize(
"func, attr, kwargs",
[
("format", "_display_funcs", {}),
("format_index", "_display_funcs_index", {"axis": 0}),
("format_index", "_display_funcs_columns", {"axis": 1}),
],
)
def test_format_clear(styler, func, attr, kwargs):
assert (0, 0) not in getattr(styler, attr) # using default
getattr(styler, func)("{:.2f}", **kwargs)
assert (0, 0) in getattr(styler, attr) # formatter is specified
getattr(styler, func)(**kwargs)
assert (0, 0) not in getattr(styler, attr) # formatter cleared to default
@pytest.mark.parametrize(
"escape, exp",
[
("html", "<>&"%$#_{}~^\\~ ^ \\ "),
(
"latex",
'<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
"\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
"\\textbackslash \\space ",
),
],
)
def test_format_escape_html(escape, exp):
chars = '<>&"%$#_{}~^\\~ ^ \\ '
df = DataFrame([[chars]])
s = Styler(df, uuid_len=0).format("&{0}&", escape=None)
expected = f'
&{chars}& | '
assert expected in s.to_html()
# only the value should be escaped before passing to the formatter
s = Styler(df, uuid_len=0).format("&{0}&", escape=escape)
expected = f'&{exp}& | '
assert expected in s.to_html()
# also test format_index()
styler = Styler(DataFrame(columns=[chars]), uuid_len=0)
styler.format_index("&{0}&", escape=None, axis=1)
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{chars}&"
styler.format_index("&{0}&", escape=escape, axis=1)
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&"
@pytest.mark.parametrize(
"chars, expected",
[
(
r"$ \$&%#_{}~^\ $ &%#_{}~^\ $",
"".join(
[
r"$ \$&%#_{}~^\ $ ",
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
r"\textbackslash \space \$",
]
),
),
(
r"\( &%#_{}~^\ \) &%#_{}~^\ \(",
"".join(
[
r"\( &%#_{}~^\ \) ",
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
r"\textbackslash \space \textbackslash (",
]
),
),
(
r"$\&%#_{}^\$",
r"\$\textbackslash \&\%\#\_\{\}\textasciicircum \textbackslash \$",
),
(
r"$ \frac{1}{2} $ \( \frac{1}{2} \)",
"".join(
[
r"$ \frac{1}{2} $",
r" \textbackslash ( \textbackslash frac\{1\}\{2\} \textbackslash )",
]
),
),
],
)
def test_format_escape_latex_math(chars, expected):
# GH 51903
# latex-math escape works for each DataFrame cell separately. If we have
# a combination of dollar signs and brackets, the dollar sign would apply.
df = DataFrame([[chars]])
s = df.style.format("{0}", escape="latex-math")
assert s._translate(True, True)["body"][0][1]["display_value"] == expected
def test_format_escape_na_rep():
# tests the na_rep is not escaped
df = DataFrame([['<>&"', None]])
s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")
ex = 'X&<>&">X | '
expected2 = '& | '
assert ex in s.to_html()
assert expected2 in s.to_html()
# also test for format_index()
df = DataFrame(columns=['<>&"', None])
styler = Styler(df, uuid_len=0)
styler.format_index("X&{0}>X", escape="html", na_rep="&", axis=1)
ctx = styler._translate(True, True)
assert ctx["head"][0][1]["display_value"] == "X&<>&">X"
assert ctx["head"][0][2]["display_value"] == "&"
def test_format_escape_floats(styler):
# test given formatter for number format is not impacted by escape
s = styler.format("{:.1f}", escape="html")
for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]:
assert expected in s.to_html()
# tests precision of floats is not impacted by escape
s = styler.format(precision=1, escape="html")
for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
assert expected in s.to_html()
@pytest.mark.parametrize("formatter", [5, True, [2.0]])
@pytest.mark.parametrize("func", ["format", "format_index"])
def test_format_raises(styler, formatter, func):
with pytest.raises(TypeError, match="expected str or callable"):
getattr(styler, func)(formatter)
@pytest.mark.parametrize(
"precision, expected",
[
(1, ["1.0", "2.0", "3.2", "4.6"]),
(2, ["1.00", "2.01", "3.21", "4.57"]),
(3, ["1.000", "2.009", "3.212", "4.566"]),
],
)
def test_format_with_precision(precision, expected):
# Issue #13257
df = DataFrame([[1.0, 2.0090, 3.2121, 4.566]], columns=[1.0, 2.0090, 3.2121, 4.566])
styler = Styler(df)
styler.format(precision=precision)
styler.format_index(precision=precision, axis=1)
ctx = styler._translate(True, True)
for col, exp in enumerate(expected):
assert ctx["body"][0][col + 1]["display_value"] == exp # format test
assert ctx["head"][0][col + 1]["display_value"] == exp # format_index test
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
"level, expected",
[
(0, ["X", "X", "_", "_"]), # level int
("zero", ["X", "X", "_", "_"]), # level name
(1, ["_", "_", "X", "X"]), # other level int
("one", ["_", "_", "X", "X"]), # other level name
([0, 1], ["X", "X", "X", "X"]), # both levels
([0, "zero"], ["X", "X", "_", "_"]), # level int and name simultaneous
([0, "one"], ["X", "X", "X", "X"]), # both levels as int and name
(["one", "zero"], ["X", "X", "X", "X"]), # both level names, reversed
],
)
def test_format_index_level(axis, level, expected):
midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"])
df = DataFrame([[1, 2], [3, 4]])
if axis == 0:
df.index = midx
else:
df.columns = midx
styler = df.style.format_index(lambda v: "X", level=level, axis=axis)
ctx = styler._translate(True, True)
if axis == 0: # compare index
result = [ctx["body"][s][0]["display_value"] for s in range(2)]
result += [ctx["body"][s][1]["display_value"] for s in range(2)]
else: # compare columns
result = [ctx["head"][0][s + 1]["display_value"] for s in range(2)]
result += [ctx["head"][1][s + 1]["display_value"] for s in range(2)]
assert expected == result
def test_format_subset():
df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])
ctx = df.style.format(
{"a": "{:0.1f}", "b": "{0:.2%}"}, subset=IndexSlice[0, :]
)._translate(True, True)
expected = "0.1"
raw_11 = "1.123400"
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
assert ctx["body"][0][2]["display_value"] == "12.34%"
ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, :])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
ctx = df.style.format("{:0.1f}", subset=IndexSlice["a"])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][0][2]["display_value"] == "0.123400"
ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, "a"])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
ctx = df.style.format("{:0.1f}", subset=IndexSlice[[0, 1], ["a"]])._translate(
True, True
)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == "1.1"
assert ctx["body"][0][2]["display_value"] == "0.123400"
assert ctx["body"][1][2]["display_value"] == raw_11
@pytest.mark.parametrize("formatter", [None, "{:,.1f}"])
@pytest.mark.parametrize("decimal", [".", "*"])
@pytest.mark.parametrize("precision", [None, 2])
@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
def test_format_thousands(formatter, decimal, precision, func, col):
styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
result = getattr(styler, func)( # testing float
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
styler = DataFrame([[1000000]], index=[1000000]).style
result = getattr(styler, func)( # testing int
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
result = getattr(styler, func)( # testing complex
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
@pytest.mark.parametrize("formatter", [None, "{:,.4f}"])
@pytest.mark.parametrize("thousands", [None, ",", "*"])
@pytest.mark.parametrize("precision", [None, 4])
@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
def test_format_decimal(formatter, thousands, precision, func, col):
styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
result = getattr(styler, func)( # testing float
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate(True, True)
assert "000_123" in result["body"][0][col]["display_value"]
styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
result = getattr(styler, func)( # testing complex
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate(True, True)
assert "000_123" in result["body"][0][col]["display_value"]
def test_str_escape_error():
msg = "`escape` only permitted in {'html', 'latex', 'latex-math'}, got "
with pytest.raises(ValueError, match=msg):
_str_escape("text", "bad_escape")
with pytest.raises(ValueError, match=msg):
_str_escape("text", [])
_str_escape(2.00, "bad_escape") # OK since dtype is float
def test_long_int_formatting():
df = DataFrame(data=[[1234567890123456789]], columns=["test"])
styler = df.style
ctx = styler._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "1234567890123456789"
styler = df.style.format(thousands="_")
ctx = styler._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "1_234_567_890_123_456_789"
def test_format_options():
df = DataFrame({"int": [2000, 1], "float": [1.009, None], "str": ["&<", "&~"]})
ctx = df.style._translate(True, True)
# test option: na_rep
assert ctx["body"][1][2]["display_value"] == "nan"
with option_context("styler.format.na_rep", "MISSING"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][1][2]["display_value"] == "MISSING"
# test option: decimal and precision
assert ctx["body"][0][2]["display_value"] == "1.009000"
with option_context("styler.format.decimal", "_"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][2]["display_value"] == "1_009000"
with option_context("styler.format.precision", 2):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][2]["display_value"] == "1.01"
# test option: thousands
assert ctx["body"][0][1]["display_value"] == "2000"
with option_context("styler.format.thousands", "_"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][1]["display_value"] == "2_000"
# test option: escape
assert ctx["body"][0][3]["display_value"] == "&<"
assert ctx["body"][1][3]["display_value"] == "&~"
with option_context("styler.format.escape", "html"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][3]["display_value"] == "&<"
with option_context("styler.format.escape", "latex"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde "
with option_context("styler.format.escape", "latex-math"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde "
# test option: formatter
with option_context("styler.format.formatter", {"int": "{:,.2f}"}):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][1]["display_value"] == "2,000.00"
def test_precision_zero(df):
styler = Styler(df, precision=0)
ctx = styler._translate(True, True)
assert ctx["body"][0][2]["display_value"] == "-1"
assert ctx["body"][1][2]["display_value"] == "-1"
@pytest.mark.parametrize(
"formatter, exp",
[
(lambda x: f"{x:.3f}", "9.000"),
("{:.2f}", "9.00"),
({0: "{:.1f}"}, "9.0"),
(None, "9"),
],
)
def test_formatter_options_validator(formatter, exp):
df = DataFrame([[9]])
with option_context("styler.format.formatter", formatter):
assert f" {exp} " in df.style.to_latex()
def test_formatter_options_raises():
msg = "Value must be an instance of"
with pytest.raises(ValueError, match=msg):
with option_context("styler.format.formatter", ["bad", "type"]):
DataFrame().style.to_latex()
def test_1level_multiindex():
# GH 43383
midx = MultiIndex.from_product([[1, 2]], names=[""])
df = DataFrame(-1, index=midx, columns=[0, 1])
ctx = df.style._translate(True, True)
assert ctx["body"][0][0]["display_value"] == "1"
assert ctx["body"][0][0]["is_visible"] is True
assert ctx["body"][1][0]["display_value"] == "2"
assert ctx["body"][1][0]["is_visible"] is True
def test_boolean_format():
# gh 46384: booleans do not collapse to integer representation on display
df = DataFrame([[True, False]])
ctx = df.style._translate(True, True)
assert ctx["body"][0][1]["display_value"] is True
assert ctx["body"][0][2]["display_value"] is False
@pytest.mark.parametrize(
"hide, labels",
[
(False, [1, 2]),
(True, [1, 2, 3, 4]),
],
)
def test_relabel_raise_length(styler_multi, hide, labels):
if hide:
styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")])
with pytest.raises(ValueError, match="``labels`` must be of length equal"):
styler_multi.relabel_index(labels=labels)
def test_relabel_index(styler_multi):
labels = [(1, 2), (3, 4)]
styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")])
styler_multi.relabel_index(labels=labels)
ctx = styler_multi._translate(True, True)
assert {"value": "X", "display_value": 1}.items() <= ctx["body"][0][0].items()
assert {"value": "y", "display_value": 2}.items() <= ctx["body"][0][1].items()
assert {"value": "Y", "display_value": 3}.items() <= ctx["body"][1][0].items()
assert {"value": "x", "display_value": 4}.items() <= ctx["body"][1][1].items()
def test_relabel_columns(styler_multi):
labels = [(1, 2), (3, 4)]
styler_multi.hide(axis=1, subset=[("A", "a"), ("B", "b")])
styler_multi.relabel_index(axis=1, labels=labels)
ctx = styler_multi._translate(True, True)
assert {"value": "A", "display_value": 1}.items() <= ctx["head"][0][3].items()
assert {"value": "B", "display_value": 3}.items() <= ctx["head"][0][4].items()
assert {"value": "b", "display_value": 2}.items() <= ctx["head"][1][3].items()
assert {"value": "a", "display_value": 4}.items() <= ctx["head"][1][4].items()
def test_relabel_roundtrip(styler):
styler.relabel_index(["{}", "{}"])
ctx = styler._translate(True, True)
assert {"value": "x", "display_value": "x"}.items() <= ctx["body"][0][0].items()
assert {"value": "y", "display_value": "y"}.items() <= ctx["body"][1][0].items()