Skip to content

Instantly share code, notes, and snippets.

@masterfermin02
Last active September 10, 2025 01:56
Show Gist options
  • Select an option

  • Save masterfermin02/54e60382e8026538a78ce0ef09bcd7f4 to your computer and use it in GitHub Desktop.

Select an option

Save masterfermin02/54e60382e8026538a78ce0ef09bcd7f4 to your computer and use it in GitHub Desktop.
<?php
function extractTotalAPagar(string $text): ?array
{
$norm = mb_strtolower(removeAccents($text));
// 1) Same-line: "Total a pagar: RD$ 1,234.56" or "TOTAL A PAGAR 1.234,56"
$patternSameLine = '/total\s*a\s*pagar[^\S\r\n]*[:\-]?[^\S\r\n]*([a-z]{0,3}\$)?[^\S\r\n]*([\d\.\,\s]+)\b/u';
if (preg_match($patternSameLine, $norm, $m, PREG_OFFSET_CAPTURE)) {
$raw = trim(matchRawSlice($text, $m[1][1], $m[2][1] + strlen($m[2][0]) - $m[1][1]));
$value = parseMoneyAmount($raw);
if ($value !== null) {
return ['label' => 'Total a pagar', 'raw' => $raw, 'value' => $value];
}
}
// 2) Next-line:
// "Total a pagar:"
// "RD$ 1.234,56"
$patternNextLine = '/total\s*a\s*pagar[^\S\r\n]*[:\-]?[^\S\r\n]*\R[^\S\r\n]*([a-z]{0,3}\$)?[^\S\r\n]*([\d\.\,\s]+)\b/u';
if (preg_match($patternNextLine, $norm, $m, PREG_OFFSET_CAPTURE)) {
$raw = trim(matchRawSlice($text, $m[1][1], $m[2][1] + strlen($m[2][0]) - $m[1][1]));
$value = parseMoneyAmount($raw);
if ($value !== null) {
return ['label' => 'Total a pagar', 'raw' => $raw, 'value' => $value];
}
}
// 3) Line-by-line fallback: find a line with the label, grab the nearest number
$lines = preg_split("/\R/u", $text);
foreach ($lines as $i => $line) {
if (preg_match('/total\s*a\s*pagar/i', removeAccents($line))) {
// Same line
if (preg_match('/([A-Z]{0,3}\$)?\s*([\d\.\,]+)\b/u', $line, $mm)) {
$raw = trim($mm[0]);
$value = parseMoneyAmount($raw);
if ($value !== null) return ['label'=>'Total a pagar','raw'=>$raw,'value'=>$value];
}
// Next 3 lines
for ($j = $i + 1; $j <= $i + 3 && $j < count($lines); $j++) {
if (preg_match('/([A-Z]{0,3}\$)?\s*([\d\.\,]+)\b/u', $lines[$j], $mm)) {
$raw = trim($mm[0]);
$value = parseMoneyAmount($raw);
if ($value !== null) return ['label'=>'Total a pagar','raw'=>$raw,'value'=>$value];
}
}
}
}
return null;
}
/** Remove accents for matching (á -> a, é -> e, etc.) */
function removeAccents(string $s): string
{
$t = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
return $t !== false ? $t : $s;
}
/** Slice the original text using byte offsets from a regex on a normalized string */
function matchRawSlice(string $text, int $start, int $length): string
{
if ($start < 0 || $length <= 0) return '';
return mb_substr($text, mb_strlen(mb_strcut($text, 0, $start, 'UTF-8'), 'UTF-8'),
mb_strlen(mb_strcut($text, $start, $length, 'UTF-8'), 'UTF-8'), 'UTF-8');
}
/** Parse "RD$ 1,234.56" or "$ 1.234,56" to float(1234.56) safely */
function parseMoneyAmount(string $raw): ?float
{
$s = trim($raw);
// Remove currency words/symbols/spaces
$s = preg_replace('/(?i)(rd\$|usd|\$|dop|ars|mxn|cop|pen|clp|\s)+/u', '', $s);
$s = trim($s);
// Decide decimal separator
$hasDot = strpos($s, '.') !== false;
$hasComma = strpos($s, ',') !== false;
if ($hasDot && $hasComma) {
// Assume thousands = dot, decimal = comma (1.234,56)
$s = str_replace('.', '', $s);
$s = str_replace(',', '.', $s);
} elseif ($hasComma && !$hasDot) {
// Assume comma is decimal (1234,56)
$s = str_replace('.', '', $s); // just in case
$s = str_replace(',', '.', $s);
} else {
// Only dot or plain digits: dot is decimal
$s = str_replace(',', '', $s);
}
// Remove any remaining spaces
$s = preg_replace('/\s+/', '', $s);
return is_numeric($s) ? (float)$s : null;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment