Source code for texteller.api.detection.detect

from typing import List

from onnxruntime import InferenceSession

from texteller.types import Bbox

from .preprocess import Compose

_config = {
    "mode": "paddle",
    "draw_threshold": 0.5,
    "metric": "COCO",
    "use_dynamic_shape": False,
    "arch": "DETR",
    "min_subgraph_size": 3,
    "preprocess": [
        {"interp": 2, "keep_ratio": False, "target_size": [1600, 1600], "type": "Resize"},
        {
            "mean": [0.0, 0.0, 0.0],
            "norm_type": "none",
            "std": [1.0, 1.0, 1.0],
            "type": "NormalizeImage",
        },
        {"type": "Permute"},
    ],
    "label_list": ["isolated", "embedding"],
}


[docs] def latex_detect(img_path: str, predictor: InferenceSession) -> List[Bbox]: """ Detect LaTeX formulas in an image and classify them as isolated or embedded. This function uses an ONNX model to detect LaTeX formulas in images. The model identifies two types of LaTeX formulas: - 'isolated': Standalone LaTeX formulas (typically displayed equations) - 'embedding': Inline LaTeX formulas embedded within text Args: img_path: Path to the input image file predictor: ONNX InferenceSession model for LaTeX detection Returns: List of Bbox objects representing the detected LaTeX formulas with their positions, classifications, and confidence scores Example: >>> from texteller.api import load_latexdet_model, latex_detect >>> model = load_latexdet_model() >>> bboxes = latex_detect("path/to/image.png", model) """ transforms = Compose(_config["preprocess"]) inputs = transforms(img_path) inputs_name = [var.name for var in predictor.get_inputs()] inputs = {k: inputs[k][None,] for k in inputs_name} outputs = predictor.run(output_names=None, input_feed=inputs)[0] res = [] for output in outputs: cls_name = _config["label_list"][int(output[0])] score = output[1] xmin = int(max(output[2], 0)) ymin = int(max(output[3], 0)) xmax = int(output[4]) ymax = int(output[5]) if score > 0.5: res.append(Bbox(xmin, ymin, ymax - ymin, xmax - xmin, cls_name, score)) return res