Files
decpdf.site/app/models/IDMatch.php
2026-01-18 00:53:18 +00:00

55 lines
1.4 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
class IDMatch extends Model {
public static function find_docid($words) {
if (!is_array($words)) {
$words = IDMatch::get_words($words);
}
$matches = IDMatch::find()->orderBy("weight")->all();
foreach ($matches as $match) {
$preg = '/^' . $match->regex . '$/';
foreach ($words as $word) {
$word = strtoupper($word);
$word = str_replace("(", "C", $word);
$word = str_replace("=", "-", $word);
$word = str_replace("--", "-", $word);
if (preg_match($preg, $word, $m)) {
return [$m[1], @$m[2]];
}
}
}
return false;
}
public static function get_words($text) {
print("<<$text>>\n");
$text = str_replace("\r", " ", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n", " ", $text);
$text = str_replace("\"", "", $text);
$text = str_replace(",", " ", $text);
$text = str_replace("", "-", $text);
$text = str_replace("", "-", $text);
$text = str_replace("~", "-", $text);
$text = str_replace("--", "-", $text);
$text = preg_replace('/([^a-zA-Z\-0-9]+)/', ' ', $text);
print(">>$text<<\n");
//$text = preg_replace("/([A-Za-z])(- )/", '$1', $text);
$words = preg_split('/\s+/', $text);
return $words;
}
}