Source code for texteller.api.katex

import re

from ..utils.latex import change_all
from .format import format_latex


def _rm_dollar_surr(content):
    pattern = re.compile(r"\\[a-zA-Z]+\$.*?\$|\$.*?\$")
    matches = pattern.findall(content)

    for match in matches:
        if not re.match(r"\\[a-zA-Z]+", match):
            new_match = match.strip("$")
            content = content.replace(match, " " + new_match + " ")

    return content


[docs] def to_katex(formula: str) -> str: """ Convert LaTeX formula to KaTeX-compatible format. This function processes a LaTeX formula string and converts it to a format that is compatible with KaTeX rendering. It removes unsupported commands and structures, simplifies LaTeX environments, and optimizes the formula for web display. Args: formula: LaTeX formula string to convert Returns: KaTeX-compatible formula string """ res = formula # remove mbox surrounding res = change_all(res, r"\mbox ", r" ", r"{", r"}", r"", r"") res = change_all(res, r"\mbox", r" ", r"{", r"}", r"", r"") # remove hbox surrounding res = re.sub(r"\\hbox to ?-? ?\d+\.\d+(pt)?\{", r"\\hbox{", res) res = change_all(res, r"\hbox", r" ", r"{", r"}", r"", r" ") # remove raise surrounding res = re.sub(r"\\raise ?-? ?\d+\.\d+(pt)?", r" ", res) # remove makebox res = re.sub(r"\\makebox ?\[\d+\.\d+(pt)?\]\{", r"\\makebox{", res) res = change_all(res, r"\makebox", r" ", r"{", r"}", r"", r" ") # remove vbox surrounding, scalebox surrounding res = re.sub(r"\\raisebox\{-? ?\d+\.\d+(pt)?\}\{", r"\\raisebox{", res) res = re.sub(r"\\scalebox\{-? ?\d+\.\d+(pt)?\}\{", r"\\scalebox{", res) res = change_all(res, r"\scalebox", r" ", r"{", r"}", r"", r" ") res = change_all(res, r"\raisebox", r" ", r"{", r"}", r"", r" ") res = change_all(res, r"\vbox", r" ", r"{", r"}", r"", r" ") origin_instructions = [ r"\Huge", r"\huge", r"\LARGE", r"\Large", r"\large", r"\normalsize", r"\small", r"\footnotesize", r"\tiny", ] for old_ins, new_ins in zip(origin_instructions, origin_instructions): res = change_all(res, old_ins, new_ins, r"$", r"$", "{", "}") res = change_all(res, r"\mathbf", r"\bm", r"{", r"}", r"{", r"}") res = change_all(res, r"\boldmath ", r"\bm", r"{", r"}", r"{", r"}") res = change_all(res, r"\boldmath", r"\bm", r"{", r"}", r"{", r"}") res = change_all(res, r"\boldmath ", r"\bm", r"$", r"$", r"{", r"}") res = change_all(res, r"\boldmath", r"\bm", r"$", r"$", r"{", r"}") res = change_all(res, r"\scriptsize", r"\scriptsize", r"$", r"$", r"{", r"}") res = change_all(res, r"\emph", r"\textit", r"{", r"}", r"{", r"}") res = change_all(res, r"\emph ", r"\textit", r"{", r"}", r"{", r"}") # remove bold command res = change_all(res, r"\bm", r" ", r"{", r"}", r"", r"") origin_instructions = [ r"\left", r"\middle", r"\right", r"\big", r"\Big", r"\bigg", r"\Bigg", r"\bigl", r"\Bigl", r"\biggl", r"\Biggl", r"\bigm", r"\Bigm", r"\biggm", r"\Biggm", r"\bigr", r"\Bigr", r"\biggr", r"\Biggr", ] for origin_ins in origin_instructions: res = change_all(res, origin_ins, origin_ins, r"{", r"}", r"", r"") res = re.sub(r"\\\[(.*?)\\\]", r"\1\\newline", res) if res.endswith(r"\newline"): res = res[:-8] # remove multiple spaces res = re.sub(r"(\\,){1,}", " ", res) res = re.sub(r"(\\!){1,}", " ", res) res = re.sub(r"(\\;){1,}", " ", res) res = re.sub(r"(\\:){1,}", " ", res) res = re.sub(r"\\vspace\{.*?}", "", res) # merge consecutive text def merge_texts(match): texts = match.group(0) merged_content = "".join(re.findall(r"\\text\{([^}]*)\}", texts)) return f"\\text{{{merged_content}}}" res = re.sub(r"(\\text\{[^}]*\}\s*){2,}", merge_texts, res) res = res.replace(r"\bf ", "") res = _rm_dollar_surr(res) # remove extra spaces (keeping only one) res = re.sub(r" +", " ", res) # format latex res = res.strip() res = format_latex(res) return res