55 lines
1.4 KiB
PHP
55 lines
1.4 KiB
PHP
<?php
|
||
|
||
class IDMatch extends Model {
|
||
|
||
public static function find_docid($words) {
|
||
if (!is_array($words)) {
|
||
$words = IDMatch::get_words($words);
|
||
}
|
||
$matches = IDMatch::find()->orderBy("weight")->all();
|
||
|
||
foreach ($matches as $match) {
|
||
$preg = '/^' . $match->regex . '$/';
|
||
|
||
foreach ($words as $word) {
|
||
$word = strtoupper($word);
|
||
$word = str_replace("(", "C", $word);
|
||
$word = str_replace("=", "-", $word);
|
||
$word = str_replace("--", "-", $word);
|
||
|
||
if (preg_match($preg, $word, $m)) {
|
||
return [$m[1], @$m[2]];
|
||
}
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
public static function get_words($text) {
|
||
print("<<$text>>\n");
|
||
$text = str_replace("\r", " ", $text);
|
||
$text = str_replace("\n\n", "\n", $text);
|
||
$text = str_replace("\n\n", "\n", $text);
|
||
$text = str_replace("\n\n", "\n", $text);
|
||
$text = str_replace("\n\n", "\n", $text);
|
||
$text = str_replace("\n", " ", $text);
|
||
$text = str_replace("\"", "", $text);
|
||
$text = str_replace(",", " ", $text);
|
||
$text = str_replace("–", "-", $text);
|
||
$text = str_replace("—", "-", $text);
|
||
$text = str_replace("~", "-", $text);
|
||
$text = str_replace("--", "-", $text);
|
||
|
||
$text = preg_replace('/([^a-zA-Z\-0-9]+)/', ' ', $text);
|
||
|
||
print(">>$text<<\n");
|
||
|
||
//$text = preg_replace("/([A-Za-z])(- )/", '$1', $text);
|
||
|
||
$words = preg_split('/\s+/', $text);
|
||
return $words;
|
||
}
|
||
|
||
|
||
}
|