Initial import
This commit is contained in:
54
app/models/IDMatch.php
Normal file
54
app/models/IDMatch.php
Normal file
@@ -0,0 +1,54 @@
|
||||
<?php
|
||||
|
||||
class IDMatch extends Model {
|
||||
|
||||
public static function find_docid($words) {
|
||||
if (!is_array($words)) {
|
||||
$words = IDMatch::get_words($words);
|
||||
}
|
||||
$matches = IDMatch::find()->orderBy("weight")->all();
|
||||
|
||||
foreach ($matches as $match) {
|
||||
$preg = '/^' . $match->regex . '$/';
|
||||
|
||||
foreach ($words as $word) {
|
||||
$word = strtoupper($word);
|
||||
$word = str_replace("(", "C", $word);
|
||||
$word = str_replace("=", "-", $word);
|
||||
$word = str_replace("--", "-", $word);
|
||||
|
||||
if (preg_match($preg, $word, $m)) {
|
||||
return [$m[1], @$m[2]];
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static function get_words($text) {
|
||||
print("<<$text>>\n");
|
||||
$text = str_replace("\r", " ", $text);
|
||||
$text = str_replace("\n\n", "\n", $text);
|
||||
$text = str_replace("\n\n", "\n", $text);
|
||||
$text = str_replace("\n\n", "\n", $text);
|
||||
$text = str_replace("\n\n", "\n", $text);
|
||||
$text = str_replace("\n", " ", $text);
|
||||
$text = str_replace("\"", "", $text);
|
||||
$text = str_replace(",", " ", $text);
|
||||
$text = str_replace("–", "-", $text);
|
||||
$text = str_replace("—", "-", $text);
|
||||
$text = str_replace("~", "-", $text);
|
||||
$text = str_replace("--", "-", $text);
|
||||
|
||||
$text = preg_replace('/([^a-zA-Z\-0-9]+)/', ' ', $text);
|
||||
|
||||
print(">>$text<<\n");
|
||||
|
||||
//$text = preg_replace("/([A-Za-z])(- )/", '$1', $text);
|
||||
|
||||
$words = preg_split('/\s+/', $text);
|
||||
return $words;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user