Last active
September 10, 2025 01:56
-
-
Save masterfermin02/54e60382e8026538a78ce0ef09bcd7f4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| function extractTotalAPagar(string $text): ?array | |
| { | |
| $norm = mb_strtolower(removeAccents($text)); | |
| // 1) Same-line: "Total a pagar: RD$ 1,234.56" or "TOTAL A PAGAR 1.234,56" | |
| $patternSameLine = '/total\s*a\s*pagar[^\S\r\n]*[:\-]?[^\S\r\n]*([a-z]{0,3}\$)?[^\S\r\n]*([\d\.\,\s]+)\b/u'; | |
| if (preg_match($patternSameLine, $norm, $m, PREG_OFFSET_CAPTURE)) { | |
| $raw = trim(matchRawSlice($text, $m[1][1], $m[2][1] + strlen($m[2][0]) - $m[1][1])); | |
| $value = parseMoneyAmount($raw); | |
| if ($value !== null) { | |
| return ['label' => 'Total a pagar', 'raw' => $raw, 'value' => $value]; | |
| } | |
| } | |
| // 2) Next-line: | |
| // "Total a pagar:" | |
| // "RD$ 1.234,56" | |
| $patternNextLine = '/total\s*a\s*pagar[^\S\r\n]*[:\-]?[^\S\r\n]*\R[^\S\r\n]*([a-z]{0,3}\$)?[^\S\r\n]*([\d\.\,\s]+)\b/u'; | |
| if (preg_match($patternNextLine, $norm, $m, PREG_OFFSET_CAPTURE)) { | |
| $raw = trim(matchRawSlice($text, $m[1][1], $m[2][1] + strlen($m[2][0]) - $m[1][1])); | |
| $value = parseMoneyAmount($raw); | |
| if ($value !== null) { | |
| return ['label' => 'Total a pagar', 'raw' => $raw, 'value' => $value]; | |
| } | |
| } | |
| // 3) Line-by-line fallback: find a line with the label, grab the nearest number | |
| $lines = preg_split("/\R/u", $text); | |
| foreach ($lines as $i => $line) { | |
| if (preg_match('/total\s*a\s*pagar/i', removeAccents($line))) { | |
| // Same line | |
| if (preg_match('/([A-Z]{0,3}\$)?\s*([\d\.\,]+)\b/u', $line, $mm)) { | |
| $raw = trim($mm[0]); | |
| $value = parseMoneyAmount($raw); | |
| if ($value !== null) return ['label'=>'Total a pagar','raw'=>$raw,'value'=>$value]; | |
| } | |
| // Next 3 lines | |
| for ($j = $i + 1; $j <= $i + 3 && $j < count($lines); $j++) { | |
| if (preg_match('/([A-Z]{0,3}\$)?\s*([\d\.\,]+)\b/u', $lines[$j], $mm)) { | |
| $raw = trim($mm[0]); | |
| $value = parseMoneyAmount($raw); | |
| if ($value !== null) return ['label'=>'Total a pagar','raw'=>$raw,'value'=>$value]; | |
| } | |
| } | |
| } | |
| } | |
| return null; | |
| } | |
| /** Remove accents for matching (á -> a, é -> e, etc.) */ | |
| function removeAccents(string $s): string | |
| { | |
| $t = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s); | |
| return $t !== false ? $t : $s; | |
| } | |
| /** Slice the original text using byte offsets from a regex on a normalized string */ | |
| function matchRawSlice(string $text, int $start, int $length): string | |
| { | |
| if ($start < 0 || $length <= 0) return ''; | |
| return mb_substr($text, mb_strlen(mb_strcut($text, 0, $start, 'UTF-8'), 'UTF-8'), | |
| mb_strlen(mb_strcut($text, $start, $length, 'UTF-8'), 'UTF-8'), 'UTF-8'); | |
| } | |
| /** Parse "RD$ 1,234.56" or "$ 1.234,56" to float(1234.56) safely */ | |
| function parseMoneyAmount(string $raw): ?float | |
| { | |
| $s = trim($raw); | |
| // Remove currency words/symbols/spaces | |
| $s = preg_replace('/(?i)(rd\$|usd|\$|dop|ars|mxn|cop|pen|clp|\s)+/u', '', $s); | |
| $s = trim($s); | |
| // Decide decimal separator | |
| $hasDot = strpos($s, '.') !== false; | |
| $hasComma = strpos($s, ',') !== false; | |
| if ($hasDot && $hasComma) { | |
| // Assume thousands = dot, decimal = comma (1.234,56) | |
| $s = str_replace('.', '', $s); | |
| $s = str_replace(',', '.', $s); | |
| } elseif ($hasComma && !$hasDot) { | |
| // Assume comma is decimal (1234,56) | |
| $s = str_replace('.', '', $s); // just in case | |
| $s = str_replace(',', '.', $s); | |
| } else { | |
| // Only dot or plain digits: dot is decimal | |
| $s = str_replace(',', '', $s); | |
| } | |
| // Remove any remaining spaces | |
| $s = preg_replace('/\s+/', '', $s); | |
| return is_numeric($s) ? (float)$s : null; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment