Source code for texteller.api.detection.detect
from typing import List
from onnxruntime import InferenceSession
from texteller.types import Bbox
from .preprocess import Compose
_config = {
"mode": "paddle",
"draw_threshold": 0.5,
"metric": "COCO",
"use_dynamic_shape": False,
"arch": "DETR",
"min_subgraph_size": 3,
"preprocess": [
{"interp": 2, "keep_ratio": False, "target_size": [1600, 1600], "type": "Resize"},
{
"mean": [0.0, 0.0, 0.0],
"norm_type": "none",
"std": [1.0, 1.0, 1.0],
"type": "NormalizeImage",
},
{"type": "Permute"},
],
"label_list": ["isolated", "embedding"],
}
[docs]
def latex_detect(img_path: str, predictor: InferenceSession) -> List[Bbox]:
"""
Detect LaTeX formulas in an image and classify them as isolated or embedded.
This function uses an ONNX model to detect LaTeX formulas in images. The model
identifies two types of LaTeX formulas:
- 'isolated': Standalone LaTeX formulas (typically displayed equations)
- 'embedding': Inline LaTeX formulas embedded within text
Args:
img_path: Path to the input image file
predictor: ONNX InferenceSession model for LaTeX detection
Returns:
List of Bbox objects representing the detected LaTeX formulas with their
positions, classifications, and confidence scores
Example:
>>> from texteller.api import load_latexdet_model, latex_detect
>>> model = load_latexdet_model()
>>> bboxes = latex_detect("path/to/image.png", model)
"""
transforms = Compose(_config["preprocess"])
inputs = transforms(img_path)
inputs_name = [var.name for var in predictor.get_inputs()]
inputs = {k: inputs[k][None,] for k in inputs_name}
outputs = predictor.run(output_names=None, input_feed=inputs)[0]
res = []
for output in outputs:
cls_name = _config["label_list"][int(output[0])]
score = output[1]
xmin = int(max(output[2], 0))
ymin = int(max(output[3], 0))
xmax = int(output[4])
ymax = int(output[5])
if score > 0.5:
res.append(Bbox(xmin, ymin, ymax - ymin, xmax - xmin, cls_name, score))
return res