Initial import

This commit is contained in:
2026-01-18 00:53:18 +00:00
parent fb78291fb1
commit 940191502e
115 changed files with 15524 additions and 0 deletions

View File

@@ -0,0 +1,66 @@
<?php
class CoverController {
static function get_cover($_request, $id, $filename, $size = false) {
$rev = new Revision($id);
if ($rev === false) return "";
$path = $rev->path();
$pdf = new PDF($path . "/doc.pdf");
if (!$pdf->exists()) {
return [404, "Not Found"];
}
if (!$size) {
$imgpath = sprintf("%s/cover.jpg", $path);
} else {
$imgpath = sprintf("%s/cover-%d.jpg", $path, $size);
}
$file = new File($imgpath);
if ($file->exists()) {
$h = $file->hash();
$h = "\"$h\"";
if ($h == $_request->header("If-None-Match")) {
return [304, "Not Modified", [
"ETag" => $h,
"Cache-Control" => "public, max-age=86400, must-revalidate",
]];
}
return new Image($file);
}
$cover = new Image($path . "/cover.jpg");
if (!$cover->exists()) {
$pdf->extract_page(0, $cover->path());
$cover = new Image($path . "/cover.jpg");
}
if (!$cover->exists()) {
return [404, "Not Found"];
}
if ($size === false) $size = $cover->width();
if ($size >= $cover->width()) {
$h = $cover->hash();
$h = "\"$h\"";
if ($h == $_request->header("If-None-Match")) {
return [304, "Not Modified", [
"ETag" => $h,
"Cache-Control" => "public, max-age=86400, must-revalidate",
]];
}
return $cover;
}
$cover->scale($size);
$img = $cover->save($imgpath, "image/jpeg");
return $img;
}
}

View File

@@ -0,0 +1,379 @@
<?php
class DocumentController {
public static function browse($pid = 0) {
$product = new Product($pid);
return blade("documents", ["product" => $product]);
}
public static function show($id) {
$doc = new Document($id);
$doc->load();
return blade("document", [
"doc" => $doc
]);
}
public static function api_get($id) {
return new Document($id);
}
public static function api_set($id) {
$doc = new Document($id);
if ($doc->valid()) {
foreach ($_POST as $k=>$v) {
if ($k != "products") {
$doc->$k = trim($v);
}
}
$doc->save();
$pids = explode(",", $_POST['products']);
$dpl = DocProduct::find([["document", "=", $id]])->all();;
foreach ($dpl as $dp) {
$e = array_search($dp->product, $pids);
if ($e !== false) {
unset($pids[$e]);
continue;
}
$dp->delete();
}
foreach ($pids as $pid) {
$dp = new DocProduct;
$dp->document = $id;
$dp->product = $pid;
$dp->save();
}
$doc->cache_invalidate("products");
}
return $doc;
}
public static function api_get_products($id) {
$doc = new Document($id);
return $doc->products;
}
public static function api_move($id) {
$doc = new Document($id);
$from = $_POST['from'];
$to = $_POST['to'];
$dp = DocProduct([["document", "=", $id], ["product", "=", $from]])->first();
if ($dp) {
$dp->product = $to;
$dp->save();
}
$doc->cache_invalidate("products");
return [];
}
public static function del_docproduct($doc, $prod) {
$dp = DocProduct::find([["document", "=", $doc], ["product", "=", $prod]])->first();
if ($dp) {
$dp->delete();
}
return back();
}
public static function get_by_id($id) {
return Document::find([["internal_id", "=", trim($id)]])->first();
}
public static function api_merge($id) {
$from = new Document($id);
$to = new Document($_POST['to']);
if ($from->id == $to->id) return [];
if ($from && $to) {
$revs = Revision::find([["document", "=", $from->id]]);
for ($rev = $revs->first(); $rev = $revs->next(); ) {
$rev->document = $to->id;
$rev->save();
}
$from->delete();
}
return new Document($to->id); // Force refresh
}
public static function api_drag_drop($_request) {
$src_id = $_request->post("src_id");
$dst_id = $_request->post("dst_id");
$src_type = $_request->post("src_type");
$dst_type = $_request->post("dst_type");
$src_extra = $_request->post("src_extra");
$dst_extra = $_request->post("dst_extra");
$copy = $_request->post("copy") == "true";
if (($src_type == "document") && ($dst_type == "document")) {
// Merge src into dst
$src = new Document($src_id);
$dst = new Document($dst_id);
if ($src->id == $dst->id) return [];
foreach ($src->revisions as $r) {
$r->document = $dst->id;
$r->save();
}
$src->delete();
$dst->cache_invalidate("revisions");
}
if (($src_type == "document") && ($dst_type == "product")) {
if ($copy) {
$dp = new DocProduct();
$dp->document = $src_id;
$dp->product = $dst_id;
$dp->save();
} else {
// Move document into product
$dpl = DocProduct::find([["document", "=", $src_id], ["product", "=", $src_extra]])->all();
foreach ($dpl as $dp) {
$dp->product = $dst_id;
$dp->save();
}
$prod = new Product($src_extra);
$prod->cache_invalidate("documents");
}
$doc = new Document($src_id);
$doc->cache_invalidate("products");
$prod = new Product($dst_id);
$prod->cache_invalidate("documents");
}
if (($src_type == "product") && ($dst_type == "product")) {
if ($src_id == $dst_id) return [];
// Move product into product
$sp = new Product($src_id);
$dp = new Product($dst_id);
$sp->parent = $dp->id;
$sp->save();
$sp->cache_invalidate("parent");
$sp->cache_invalidate("children");
$dp->cache_invalidate("parent");
$dp->cache_invalidate("children");
}
return [200, ["didit" => "true"]];
}
public static function merge($id) {
$doc = new Document($id);
$proc = new Process("pdftk");
foreach ($doc->revisions as $rev) {
$proc->arg($rev->path() . "/doc.pdf");
}
$nr = new Revision;
$nr->document = $doc->id;
$nr->revno = "NEW";
$nr->save();
$out = new File($nr->path() . "/doc.pdf");
$out->parent()->mkdir();
$proc->arg("output");
$proc->arg((string)$out);
$r = $proc->execute();
if ($r != 0) {
print("<pre>");
print_r($proc->stderr());
print("</pre>");
exit(0);
}
return redirect("/document/" . $doc->id);
}
public static function create_overview($id) {
$job = new GeminiJob($id, "document:$id");
$jobid = $job->queue();
flash("success", "Job queued as ID " . $jobid);
return redirect("/document/" . $id);
}
public static function api_get_title_fragment($_request) {
$q = $_request->post("title");
$db = DB::getInstance();
$q1 = $db->query("
SELECT
DISTINCT title
FROM (
SELECT
DISTINCT title
FROM
document
WHERE
title LIKE :s
UNION SELECT
DISTINCT subtitle AS title
FROM
document
WHERE
subtitle LIKE :s
UNION SELECT
DISTINCT subsubtitle AS title
FROM
document
WHERE subsubtitle LIKE :s
) AS DERIVED", ["s" => $q . "%"]);
$o = $db->all($q1);
if ($o->count() == 0) {
return [404, "Not Found"];
}
if ($o->count() != 1) {
return [413, "Content Too Large"];
}
return $o[0]->title;
}
public static function separate($id) {
$doc = new Document($id);
$firstprod = $doc->products[0];
$count = 0;
foreach ($doc->revisions as $rev) {
$count++;
if ($count == 1) {
continue;
}
$newdoc = $doc->duplicate();
$newdoc->subsubtitle .= " - $count";
$rev->document = $newdoc->id;
$rev->save();
}
return redirect("/documents/" . $firstprod);
}
public static function api_get_metadata($id) {
return DocMeta::find([["document", "=", $id]])->orderBy("metadata")->all();
}
public static function api_new_metadata($_request, $id) {
$doc = new Document($id);
$doc->set_metadata($_request->put("item_id"), "");
return DocMeta::find([["document", "=", $id]])->orderBy("metadata")->all();
}
public static function api_set_metadata($_request, $id, $metadata) {
$doc = new Document($id);
$doc->set_metadata($metadata, $_request->post('data'));
return DocMeta::find([["document", "=", $id]])->orderBy("metadata")->all();
}
public static function api_delete_metadata($id, $metadata) {
$doc = new Document($id);
$doc->remove_metadata($metadata);
return DocMeta::find([["document", "=", $id]])->orderBy("metadata")->all();
}
public static function api_available_metadata($id) {
$exist = DocMeta::find([["document", "=", $id]])->orderBy("metadata")->all();
$metas = MetaType::find()->all();
$out = new Collection();
foreach ($metas as $meta) {
$e = false;
foreach ($exist as $ex) {
if ($ex->metadata == $meta->id) {
$e = true;
break;
}
}
if (!$e) {
$cl = new stdClass;
$cl->key = $meta->id;
$cl->value = $meta->name;
$out->push($cl);
}
}
$out->sort("value");
return $out;
}
public static function delete_metadata($id, $metadata) {
$doc = new Document($id);
$doc->remove_metadata($metadata);
return redirect("/document/" . $id);
}
public static function api_guess_docid($id) {
$doc = new Document($id);
$docid = $doc->guess_docid();
return new Collection(["id" => $id, "docid" => $docid]);
}
public static function download_attachment($id, $filename) {
$doc = new Document($id);
$atts = $doc->get_attachments();
foreach ($atts as $f) {
if ($f->basename() == $filename) {
$f->set_header("Content-Disposition", "attachment; filename=\"$filename\"");
return $f;
}
}
return false;
}
public static function upload_attachment($id) {
$doc = new Document($id);
return blade("upload.attachment", ["doc" => $doc]);
}
public static function do_upload_attachment($_request, $id) {
$doc = new Document($id);
mkdir(ROOT . "/attachments/" . $doc->id, 0777);
$f = 0;
while (($file = $_request->file("file", $f)) !== false) {
$nf = new File($file['tmp_name']);
$nf->rename(ROOT . "/attachments/" . $doc->id . "/" . $file['name']);
$f++;
}
return redirect("/document/" . $doc->id);
}
}

View File

@@ -0,0 +1,31 @@
<?php
class DownloadController {
public static function get_download($id) {
return new DownloadJob($id);
}
public static function start_download() {
}
public static function api_downloads() {
$s = DownloadJob::find([["processed", "=", 0], ["owner", "=", get_user()->id]])->orderBy("queued")->limit(20);
return $s->all();
}
public static function api_add_download($_request) {
$url = $_request->put("url");
$d = new DownloadJob;
$d->url = $url;
$d->queued = time();
$d->started = 0;
$d->finished = 0;
$d->processed = 0;
$d->owner = get_user()->id;
$d->file = sprintf("download/file-%08X-%08X", rand(), time());
$d->save();
return DownloadController::api_downloads();
}
}

View File

@@ -0,0 +1,7 @@
<?php
class HomeController {
static function index() {
return blade("index");
}
}

View File

@@ -0,0 +1,78 @@
<?php
class ImportController {
public static function downloads() {
return blade("downloads");
}
public static function imports() {
return blade("imports");
}
public static function api_imports() {
$is = ProcessJob::find([["imported", "=", 0]])->orderBy("queued")->limit(50)->all();
foreach ($is as $i) {
$i->load("revision");
}
return $is;
}
public static function api_delete_import($id) {
$d = new ProcessJob($id);
if ($d->valid()) {
$d->delete();
}
return ImportController::api_imports();
}
public static function api_set_import($_request, $id) {
$d = new ProcessJob($id);
if ($d->valid()) {
foreach ($_POST as $k=>$v) {
$d->$k = $v;
}
$d->save();
}
return ImportController::api_imports();
}
public static function api_add_document($_request, $id) {
$job = new ProcessJob($id);
$doc = Document::find([["internal_id", "=", $_request->put('internal_id')]])->first();
if (!$doc) {
$doc = new Document;
$doc->internal_id = trim($_request->put('internal_id'));
$doc->title = trim($_request->put('title'));
$doc->subtitle = trim($_request->put('subtitle'));
$doc->subsubtitle = trim($_request->put('subsubtitle'));
$doc->overview = trim($_request->put('overview'));
$doc->owner = get_user()->id;
$doc->save();
$prods = explode(",", $_request->put('products'));
foreach ($prods as $product) {
$dp = new DocProduct;
$dp->document = $doc->id;
$dp->product = $product;
$dp->save();
}
}
$job->document = $doc->id;
$rev = new Revision($job->revision);
$rev->document = $doc->id;
$rev->revno = trim($_request->put('revno'));
$rev->month = $_request->put('month');
$rev->year = $_request->put('year');
$rev->owner = get_user()->id;
$rev->save();
$job->imported = time();
$job->save();
return ImportController::api_imports();
}
}

View File

@@ -0,0 +1,23 @@
<?php
class JobController {
public static function api_get_jobs($source) {
$db = DB::getInstance();
$q = $db->query("select * from job where source=:source order by queued", ["source" => $source]);
$c = new Collection();
while ($r = $db->nextRecord($q)) {
$c->push($r);
}
return $c;
}
public static function api_delete_job($id) {
$db = DB::getInstance();
$q = $db->query("select * from job where id=:id", ["id" => $id]);
$r = $db->nextRecord($q);
$q = $db->query("delete from job where id=:id", ["id" => $id]);
return JobController::api_get_jobs($r->source);
}
}

View File

@@ -0,0 +1,55 @@
<?php
class PDFController {
public static function download($id, $type, $filename) {
$rev = new Revision($id);
$path = $rev->path();
$rev->downloads++;
$rev->last_download = time();
$rev->save();
$disp = "attachment";
$mime = "application/octet-stream";
$file = null;
switch ($type) {
case "view":
$file = new PDF($path . "/doc.pdf");
break;
case "download":
$file = new PDF($path . "/doc.pdf");
$file->force_download();
break;
case "viewocr":
$file = new PDF($path . "/ocr.pdf");
break;
case "downloadocr":
$file = new PDF($path . "/ocr.pdf");
$file->force_download();
break;
}
$file->fake_filename($filename);
if ($file == null) {
return [404, "Not Found 1"];
}
if (!$file->exists()) {
return [404, "Not Found 2"];
}
return $file;
}
public static function get_page($id, $page) {
$rev = new Revision($id);
$page = $rev->get_page($page, 150);
if ($page) {
return $page;
}
return [404, "Not Found"];
}
}

View File

@@ -0,0 +1,161 @@
<?php
class ProductController {
public static function api_get_list($list) {
$db = DB::getInstance();
$pids = explode(",", $list);
$out = new Collection();
foreach ($pids as $pid) {
$q = $db->query("select * from product where id=:id", ["id" => $pid]);
if ($r = $db->nextRecord($q)) {
$out->push($r);
}
}
return $out;
}
public static function api_get_mru() {
$out = new Collection;
$mru = Session::get("mru");
if (!$mru) {
$mru = [];
}
foreach ($mru as $p) {
$prod = new Product($p);
if ($prod) {
$out->push($prod);
}
}
return $out;
}
public static function api_add_mru($id) {
$mru = Session::get("mru");
if (!$mru) {
$mru = [];
}
array_unshift($mru, $id);
$mru = array_unique($mru);
while (count($mru) > 5) {
array_pop($mru);
}
Session::set("mru", $mru);
return ProductController::api_get_mru();
}
public static function api_search() {
return Product::find([["full_path", "like", "%" . $_POST['search'] . "%"]])->orderBy("full_path")->all();
}
public static function api_add_child($_request, $id) {
$p = new Product;
$p->parent = $id;
$p->title = $_request->put("title");
$p->save();
return $p;
}
public static function api_set($id) {
$p = new Product($id);
$data = [];
foreach ($_POST as $k=>$v) {
$p->$k = $v;
}
$p->save();
return $p;
}
public static function api_move($id) {
$p = new Product($id);
$p->parent = $_POST['to'];
$p->save();
return $p;
}
public static function api_delete($id) {
$product = new Product($id);
if ($product->valid()) {
$laf = Product::find([["title", "=", "Lost and Found"]])->first();
DB::getInstance()->query("delete from docproduct where product=:pid", ["pid" => $product->id]);
foreach ($product->children as $c) {
$c->parent = $laf;
$c->save();
}
$product->load("parent");
$par = $product->parent;
$product->delete();
return $par;
}
return [];
}
public static function api_empty_trash($id) {
$prod = new Product($id);
$docs = $prod->documents;
foreach ($docs as $doc) {
$revs = $doc->revisions;
foreach ($revs as $rev) {
$rev->delete();
}
$dpl = DocProduct::find([["document", "=", $doc->id]])->all();
foreach ($dpl as $dp) {
$dp->delete();
}
$doc->delete();
}
$prod->invalidate("documents");
return [];
}
public static function api_available_metadata($id) {
$prod = new Product($id);
$exist = $prod->meta();
$metas = MetaType::find()->all();
$out = new Collection();
foreach ($metas as $meta) {
if (!in_array($meta->id, $exist)) {
$cl = new stdClass;
$cl->key = $meta->id;
$cl->value = $meta->name;
$out->push($cl);
}
}
$out->sort("value");
return $out;
}
public static function api_add_metadata($_request, $id) {
$prod = new Product($id);
$prod->add_meta($_request->put('item_id'));
return Collection::from_array($prod->meta());
}
public static function api_gemini_all($id) {
$prod = new Product($id);
$c = new Collection();
foreach ($prod->documents as $doc) {
$job = new GeminiJob($doc->id, "document:" . $doc->id);
$jobid = $job->queue();
$c->push(["document" => $doc->id, "job" => $jobid]);
}
return $c;
}
}

View File

@@ -0,0 +1,46 @@
<?php
class RevisionController {
public static function show($id) {
$rev = new Revision($id);
$rev->load("document");
return blade("revision", ["rev" => $rev]);
}
public static function api_set($id) {
$r = new Revision($id);
if ($r !== false) {
foreach ($_POST as $k=>$v) {
$r->$k = trim($v);
}
$r->save();
}
return $r;
}
public static function delete($id) {
$r = new Revision($id);
$r->load("document");
$doc = $r->document;
$r->delete();
return redirect("/document/" . $doc->id);
}
public static function redownload($id) {
$r = new Revision($id);
$j = new DownloadJob($r->id, $r->origtitle, $r->path() . "/doc.pdf");
$jobid = $j->queue();
flash("success", "Job queued as ID " . $jobid);
return redirect("/revision/" . $id);
}
public static function purge($id) {
$r = new Revision($id);
$r->purge();
return redirect("/revision/" . $id);
}
}

View File

@@ -0,0 +1,76 @@
<?php
class SearchController {
static public function api_title_search() {
$out = new Collection;
$q = DB::getInstance()->query("
select
id, internal_id, title, subtitle, subsubtitle,
match (internal_id, title, subtitle, subsubtitle, overview)
against (:search in boolean mode)
as rel
from
document
where
match (internal_id, title, subtitle, subsubtitle, overview)
against (:search in boolean mode)
order by
rel desc
limit
10
", ["search" => $_POST['search']]);
while ($r = DB::getInstance()->nextRecord($q)) {
$o = new Document($r->id);
$out->push($o);
}
return $out;
}
static public function search($page = 0) {
if (array_key_exists("search", $_POST)) {
$q = DB::getInstance()->query("
select
revision.id as id,
match(ocr.body) against (:search) as relevance
from
revision,ocr
where
match(ocr.body) against (:search) and
ocr.revision = revision.id and
not revision.document is null
order by
relevance desc
", ["search" => $_POST['search']]);
$slog = [];
while ($r = DB::getInstance()->nextRecord($q)) {
$slog[] = $r->id;
}
Session::set("search", json_encode($slog));
}
$rpp = 8;
$offset = $page * $rpp;
$out = [];
$slog = json_decode(Session::get("search"));
for ($i = 0; $i < $rpp; $i++) {
if ($offset + $i < count($slog)) {
$rev = new Revision($slog[$offset + $i]);
if ($rev) {
$out[] = $rev;
}
}
}
return blade("search", ["page" => $page, "count" => count($slog), "results" => $out, "pages" => ceil(count($slog) / $rpp)]);
}
}

View File

@@ -0,0 +1,92 @@
<?php
class SpiderController {
public static function spider_pdfs() {
return blade("spider_pdfs");
}
public static function spider_pages() {
return blade("spider_pages");
}
public static function api_pdfs() {
$pdfs = Spider::find([["status", "=", "N"]])->orderBy("id")->limit(20)->all();
return $pdfs;
}
public static function api_pages() {
$pages = SpiderPage::find([["status", "=", "O"], ["title", "!=", ""]])->orderBy("title")->limit(20)->all();
return $pages;
}
public static function api_reject_pdf($id = null) {
if ($id == null) return [];
$i = explode(",", $id);
foreach ($i as $id) {
$pdf = new Spider($id);
if ($pdf) {
$pdf->status = "B";
$pdf->save();
}
}
return SpiderController::api_pdfs();
}
public static function api_accept_pdf($_request, $id = null) {
if ($id == null) return [];
$i = explode(",", $id);
foreach ($i as $id) {
$pdf = new Spider($id);
if ($pdf) {
$pdf->status = "D";
$pdf->save();
$job = new DownloadJob;
$job->queued = time();
$job->started = 0;
$job->finished = 0;
$job->processed = 0;
$job->url = $pdf->url;
$job->owner = get_user()->id;
$job->file = sprintf("download/file-%08X-%08X", rand(), time());
$job->save();
}
}
return SpiderController::api_pdfs();
}
public static function api_reject_page($_request, $id) {
$i = explode(",", $id);
foreach ($i as $id) {
$page = new SpiderPage($id);
if ($page) {
$page->status = "B";
$page->save();
}
}
return SpiderController::api_pages();
}
public static function api_accept_page($id) {
$i = explode(",", $id);
foreach ($i as $id) {
$page = new SpiderPage($id);
if ($page) {
$page->status = "N";
$page->save();
}
}
return SpiderController::api_pages();
}
}

View File

@@ -0,0 +1,79 @@
<?php
class SystemController {
public static function status() {
$status = [
"B" => "Blacklisted",
"N" => "Pending",
"F" => "Failed",
"D" => "Done",
"Y" => "Done",
"W" => "Postponed",
"P" => "Processing",
"X" => "Deleted",
"O" => "Off-site",
"Q" => "Postponed",
"R" => "Redirect",
];
$q = DB::getInstance()->query("select count(id) as c, status from pages group by status order by status");
$spider = [];
while ($r = DB::getInstance()->nextRecord($q)) {
$spider[@$status[$r->status]] = $r->c;
}
$q = DB::getInstance()->query("select count(id) as c, status from spider group by status order by status");
$pdf = [];
while ($r = DB::getInstance()->nextRecord($q)) {
$pdf[@$status[$r->status]] = $r->c;
}
$q = DB::getInstance()->query("select count(id) as c, ocr from revision group by ocr order by ocr");
$ocr = [];
while ($r = DB::getInstance()->nextRecord($q)) {
$ocr[@$status[$r->ocr]] = $r->c;
}
$q = DB::getInstance()->query("select count(id) as c, idx from revision group by idx order by idx");
$idx = [];
while ($r = DB::getInstance()->nextRecord($q)) {
$idx[@$status[$r->idx]] = $r->c;
}
return blade("status", ["spider" => $spider, "pdf" => $pdf, "ocr" => $ocr, "idx" => $idx]);
}
public static function api_get_idmatches() {
return IDMatch::find([["id", ">=", 0]])->orderBy("weight")->all();
}
public static function api_add_idmatch($_request) {
$i = new IDMatch;
$i->example = $_request->put('example');
$i->regex = $_request->put('regex');
$i->weight = $_request->put('weight');
$i->save();
return SystemController::api_get_idmatches();
}
public static function api_set_idmatch($id) {
$i = new IDMatch($id);
if ($i) {
$i->example = $_POST['example'];
$i->regex = $_POST['regex'];
$i->weight = $_POST['weight'];
$i->save();
}
return SystemController::api_get_idmatches();
}
public static function api_del_idmatch($id) {
$i = new IDMatch($id);
if ($i) {
$i->delete();
}
return SystemController::api_get_idmatches();
}
}

57
app/jobs/DownloadJob.php Normal file
View File

@@ -0,0 +1,57 @@
<?php
class DownloadJob extends Job {
public $from = null;
public $to = null;
public $revid = null;
private $_pct = 0;
public function __construct($revid, $from, $to) {
$this->from = $from;
$this->to = $to;
$this->revid = $revid;
parent::__construct("revision:" . $revid);
}
public function run() {
$ch = curl_init();
$this->status("Downloading: 0%");
$fd = fopen($this->to, "w");
print_r($this);
curl_setopt($ch, CURLOPT_URL, $this->from);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36");
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15);
curl_setopt($ch, CURLOPT_PRIVATE, $this);
curl_setopt($ch, CURLOPT_TIMEOUT, 3600);
curl_setopt($ch, CURLOPT_FILETIME, true);
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, [$this, 'download_progress']);
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
curl_setopt($ch, CURLOPT_FILE, $fd);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
print("Running...\n");
$r = curl_exec($ch);
fclose($fd);
$sha = hash_file("sha256", $this->to);
$rev = new Revision($this->revid);
$rev->sha256 = $sha;
$rev->save();
print("Finished\n");
}
function download_progress($ch, $download_size, $downloaded, $upload_size, $uploaded) {
if ($download_size == 0) return;
$pct = round($downloaded / $download_size * 100);
if ($pct != $this->_pct) {
$this->status("Downloading: " . $pct . "%");
$this->_pct = $pct;
}
}
}

214
app/jobs/GeminiJob.php Normal file
View File

@@ -0,0 +1,214 @@
<?php
class GeminiJob extends Job {
public $docid;
public function __construct($docid, $source="unknown") {
parent::__construct($source);
$this->docid = $docid;
}
public function run() {
$pdf = false;
$subs = [
"Installation and Operating Information",
"Installation and Configuration",
"Installing and Getting Started",
"Installation/Operator's Manual",
"Installation/Operator's Guide",
"Programmer's Reference Guide",
"Field Maintenance Print Set",
"Illustrated Parts Breakdown",
"Installation/Owner's Guide",
"Installation Information",
"Installation/User Guide",
"User Documentation Kit",
"Programmer Information",
"Technical Description",
"Operator Information",
"Configuration Guide",
"Upgrade Information",
"Installation Manual",
"Service Information",
"Installation Guide",
"Programming Manual",
"Maintenance Manual",
"Maintenance Guide",
"Technical Summary",
"Operator's Guide",
"System Reference",
"User Information",
"Technical Manual",
"Language Manual",
"Service Manual",
"Service Guide",
"Read Me First",
"Owner's Guide",
"Release Notes",
"Options Guide",
"Users' Manual",
"User's Manual",
"HiTest Notes",
"User's Guide",
"Design Guide",
"User Manual",
"User Guide",
];
$doc = new Document($this->docid);
$minsize = 999999999999999;
foreach ($doc->revisions as $r) {
$p = new PDF($r->path() . "/doc.pdf");
if ($p->size() < $minsize) {
$minsize = $p->size();
$rev = $r;
$pdf = $p;
}
}
if (!$pdf) {
$this->fail();
return;
}
$this->status("Processing document with Gemini");
try {
$gemini = new Gemini();
$gemini->upload_callback([$this, "uploadcb"]);
$gemini->process_callback([$this, "processcb"]);
$lines = $gemini->geminiOverview($pdf);
} catch (Exception $e) {
$this->status($e->getMessage());
if ($e->getmessage() == "The request timed out. Please try again.") {
$this->retry();
} else if ($e->getmessage() == "HTTP Error 0 requesting AI assistance") {
$this->retry();
} else {
$this->fail();
}
return;
}
$this->status("Postprocessing returned data");
$ld = explode("\n", $lines);
if (preg_match('/^\{(.*)\}$/', trim($ld[0]), $m)) {
$title = trim($m[1]);
$title = str_replace("", "'", $title);
$newsub = "";
$newsubsub = "";
$b = explode(":", $title);
if (count($b) > 1) {
$title = trim(array_shift($b));
$newsub = trim(array_shift($b));
$newsubsub = implode(": ", $b);
}
if ($newsub == "") {
foreach ($subs as $sub) {
if (str_ends_with($title, $sub)) {
$newsub = $sub;
$title = substr($title, 0, 0 - (strlen($sub) + 1));
break;
}
}
}
$doc->title = $title;
$doc->subtitle = $newsub;
$doc->subsubtitle = $newsubsub;
array_shift($ld);
if (trim($ld[0]) == "") {
array_shift($ld);
}
}
if (preg_match('/^\[(.*)\]$/', trim($ld[0]), $m)) {
$iid = IDMatch::find_docid($this->cleanup($m[1]));
if ($iid) {
$doc->internal_id = $iid[0];
}
array_shift($ld);
}
$doc->overview = implode("\n", $ld);
$doc->save();
/*
$db = DB::getInstance();
$q = $db->query("
select
product.id as id,
locate(product.title, document.title) as wordoffset
from
product,
document
where
locate(product.title, document.title) > 0 and
document.id = :id and
length(product.title) > 3
order by
length(product.title) desc
limit 0,1
", ["id" => $doc->id]);
if ($r = $db->nextRecord($q)) {
$d = new DocProduct();
$d->document = $doc->id;
$d->product = $r->id;
$d->save();
} else {
$q = $db->query("
select
product.id as id,
locate(product.title, document.overview) as wordoffset
from
product,
document
where
locate(product.title, document.overview) > 0 and
locate(product.title, document.overview) < 300 and
document.id = :id and
length(product.title) > 3
order by
length(product.title) desc
limit 0,1
", ["id" => $doc->id]);
if ($r = $db->nextRecord($q)) {
$d = new DocProduct();
$d->document = $doc->id;
$d->product = $r->id;
$d->save();
}
}
*/
$this->status("Finished");
}
public function uploadcb($percent) {
$this->status("Uploading: " . $percent . "% complete");
}
public function processcb($message) {
$this->status($message);
}
function cleanup($txt) {
$txt = str_replace("Ø", "0", $txt);
$txt = str_replace(".", " ", $txt);
return $txt;
}
}

15
app/jobs/ProcessJob.php Normal file
View File

@@ -0,0 +1,15 @@
<?php
class ProcessJob extends Job {
public $docid = 0;
public function __construct($docid) {
parent::__construct("document:$docid");
$this->docid = $docid;
}
public function run() {
}
}

13
app/models/DocMeta.php Normal file
View File

@@ -0,0 +1,13 @@
<?php
class DocMeta extends Model {
protected $_classes = [
"document" => "Document",
"metadata" => "MetaType"
];
}

17
app/models/DocProduct.php Normal file
View File

@@ -0,0 +1,17 @@
<?php
class DocProduct extends Model {
protected $_classes = [
"document" => "Document",
"product" => "Product"
];
protected $_model = [
"id" => [MODEL_SERIAL],
"created" => [MODEL_BIGINT, 11],
"updated" => [MODEL_BIGINT, 11],
"document" => [MODEL_OBJECT, "Document"],
"product" => [MODEL_OBJECT, "Product"]
];
}

191
app/models/Document.php Normal file
View File

@@ -0,0 +1,191 @@
<?php
class Document extends Model {
protected $_computed = [
"related" => "get_related",
"revisions" => "get_revisions",
"products" => "get_products",
"metadata" => "get_metadata",
"attachments" => "get_attachments",
];
public function get_revisions() {
return Revision::find([["document", "=", $this->id]])->orderBy("revno")->all();
}
public function get_related() {
$revs = Revision::find([["document", "=", $this->id]])->all();
$related = new Collection;
foreach ($revs as $rev) {
$q = DB::getInstance()->query("
select
id,
internal_id,
title,
subtitle,
subsubtitle
from
document
where
internal_id in (
select
distinct words.word
from
words,
revwords
where
words.id=revwords.word
and
revwords.revision=:rev
and
words.word like '%-%-%'
)", array("rev" => $rev->id));
while ($r = DB::getInstance()->nextRecord($q)) {
if ($r->id != $this->id) {
$related->push(new Document($r->id));
}
}
}
return $related;
}
public function overview_md() {
return \Michelf\Markdown::defaultTransform($this->overview);
}
public function on_delete() {
DB::getInstance()->query("delete from docproduct where document=:id", ["id" => $this->id]);
}
private $_products = null;
public function get_products() {
if ($this->_products == null) {
$this->_products = new Collection;
$dpl = DocProduct::find([["document", "=", $this->id]])->all();
foreach ($dpl as $dp) {
$dp->load("product");
$this->_products->push($dp->product);
}
}
return $this->_products;
}
public function duplicate() {
$newdoc = new Document();
$newdoc->title = $this->title;
$newdoc->subtitle = $this->subtitle;
$newdoc->subsubtitle = $this->subsubtitle;
$newdoc->overview = $this->overview;
$newdoc->internal_id = $this->internal_id;
$newdoc->owner = $this->owner;
$newdoc->year = $this->year;
$newdoc->month = $this->month;
$newdoc->save();
$dpl = DocProduct::find([["document", "=", $this->id]])->all();
foreach ($dpl as $dp) {
$ndp = new DocProduct();
$ndp->document = $newdoc->id;
$ndp->product = $dp->product;
$ndp->save();
}
return $newdoc;
}
public function remove_product($id) {
$dp = DocProduct::find([["document", "=", $this->id], ["product", "=", $id]])->first();
if ($dp) {
$dp->delete();
}
}
public function set_metadata($metadata, $value) {
$m = DocMeta::find([["document", "=", $this->id], ["metadata", "=", $metadata]])->first();
if (!$m) {
$m = new DocMeta();
$m->document = $this->id;
$m->metadata = $metadata;
}
$m->data = $value;
$m->save();
return $m->id;
}
public function get_metadata() {
return DocMeta::find([["document", "=", $this->id]])->all();
}
public function remove_metadata($metadata) {
$m = DocMeta::find([["document", "=", $this->id], ["metadata", "=", $metadata]])->first();
if ($m) {
$m->delete();
}
}
public function get_metadata_by_id($metadata) {
$m = DocMeta::find([["document", "=", $this->id], ["metadata", "=", $metadata]])->first();
if (!$m) {
return "";
}
return $m->data;
}
public function guess_docid() {
$text = $this->title . " " . $this->subtitle . " " . $this->subsubtitle;
$text .= $this->overview;
$words = $this->get_words($text);
return IDMatch::find_docid($words)[0];
}
function get_words($text) {
$text = str_replace("\r", " ", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n", " ", $text);
$text = str_replace("\"", "", $text);
$text = str_replace(",", " ", $text);
$text = str_replace("", "-", $text);
$text = str_replace("~", "-", $text);
$text = str_replace("--", "-", $text);
$text = preg_replace('/([^a-zA-Z\-0-9]+)/', ' ', $text);
//$text = preg_replace("/([A-Za-z])(- )/", '$1', $text);
$words = preg_split('/\s+/', $text);
return $words;
}
function get_attachments() {
$ap = ROOT . "/attachments/" . $this->id;
$files = new Collection;
if (!file_exists($ap)) {
return $files;
}
if (!is_dir($ap)) {
return $files;
}
$dir = opendir($ap);
while ($f = readdir($dir)) {
if (substr($f, 0, 1) == ".") continue;
$f = new File($ap . "/" . $f);
$files->push($f);
}
return $files;
}
}

54
app/models/IDMatch.php Normal file
View File

@@ -0,0 +1,54 @@
<?php
class IDMatch extends Model {
public static function find_docid($words) {
if (!is_array($words)) {
$words = IDMatch::get_words($words);
}
$matches = IDMatch::find()->orderBy("weight")->all();
foreach ($matches as $match) {
$preg = '/^' . $match->regex . '$/';
foreach ($words as $word) {
$word = strtoupper($word);
$word = str_replace("(", "C", $word);
$word = str_replace("=", "-", $word);
$word = str_replace("--", "-", $word);
if (preg_match($preg, $word, $m)) {
return [$m[1], @$m[2]];
}
}
}
return false;
}
public static function get_words($text) {
print("<<$text>>\n");
$text = str_replace("\r", " ", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n\n", "\n", $text);
$text = str_replace("\n", " ", $text);
$text = str_replace("\"", "", $text);
$text = str_replace(",", " ", $text);
$text = str_replace("", "-", $text);
$text = str_replace("", "-", $text);
$text = str_replace("~", "-", $text);
$text = str_replace("--", "-", $text);
$text = preg_replace('/([^a-zA-Z\-0-9]+)/', ' ', $text);
print(">>$text<<\n");
//$text = preg_replace("/([A-Za-z])(- )/", '$1', $text);
$words = preg_split('/\s+/', $text);
return $words;
}
}

11
app/models/MetaType.php Normal file
View File

@@ -0,0 +1,11 @@
<?php
class MetaType extends Model {
protected $table = "metatypes";
public static function name($id) {
$m = new MetaType($id);
return $m->name;
}
}

4
app/models/OCR.php Normal file
View File

@@ -0,0 +1,4 @@
<?php
class OCR extends Model {
}

163
app/models/Product.php Normal file
View File

@@ -0,0 +1,163 @@
<?php
class Product extends Model {
protected $_classes = [
"parent" => "Product"
];
protected $_computed = [
"documents" => "get_documents",
"children" => "get_children",
];
protected $_triggers = [
"parent" => "update_path",
"title" => "update_path",
];
private $_children = null;
public function get_children() {
$c = $this->cache_get("children");
if ($c) return $c;
if ($this->_children == null) {
$this->_children = Product::find([["parent", "=", $this->id]])->orderBy("title")->all();
}
$this->cache_set("children", $this->_children);
return $this->_children;
}
public function get_full_title() {
$tree = $this->get_tree();
$n = [];
foreach ($tree as $t) {
$n[] = $t->title;
}
$out = implode(" / ", $n);
return $out;
}
public function update_path($ppath = null) {
if ($ppath == null) {
$this->full_path = $this->get_full_title();
} else {
$this->full_path = $ppath . " / " . $this->title;
}
$this->full_path = str_replace("/ / ", "/ ", $this->full_path);
$this->save();
foreach ($this->get_children() as $child) {
$child->update_path($this->fill_path);
}
}
public function get_tree() {
$out = [];
if ($this->load("parent")) {
$p = $this->parent;
$out = $p->get_tree();
}
array_push($out, $this);
return $out;
}
public function overview_md() {
return \Michelf\Markdown::defaultTransform($this->overview);
}
public function on_delete() {
DB::getInstance()->query("delete from docproduct where product=:id", ["id" => $this->id]);
}
private $_documents = null;
public function get_documents() {
$d = $this->cache_get("documents");
if ($d) return $d;
if ($this->_documents == null) {
//$dpl = DocProduct::find([["product", "=", $this->id]])->limit(100)->all();
$dpl = DocProduct::find([["product", "=", $this->id]])->all();
$this->_documents = new Collection;
foreach ($dpl as $dp) {
if ($dp->load("document")) {
$this->_documents->push($dp->document);
}
}
$this->_documents->sort("subsubtitle", true);
$this->_documents->sort("subtitle", true);
$this->_documents->sort("title", true);
}
$this->cache_set("documents", $this->_documents);
return $this->_documents;
}
public function add_document($doc) {
$dp = new DocProduct;
$dp->product = $this->id;
$dp->document = $doc->id;
$dp->save();
}
public function meta() {
if ($this->metadata == null) {
return [];
}
return explode(",", $this->metadata);
}
public function add_meta($id, $save = true) {
$m = $this->meta();
if (!in_array($id, $m)) {
$m[] = $id;
}
$this->metadata = implode(",", $m);
if ($save) $this->save();
}
public function del_meta($id, $save = true) {
$m = $this->meta();
$o = [];
foreach ($m as $v) {
if ($v != $id) {
$o[] = $v;
}
}
$this->metadaya = implode(",", $o);
if ($save) $this->save();
}
public function documents_sorted_by_meta() {
if ($this->metadata == null) {
$meta = [];
} else {
$meta = explode(",", $this->metadata);
}
$docs = $this->get_documents();
while (count($meta) > 0) {
$mid = array_pop($meta);
$docs->sort_with_function( function($a, $b) use ($mid) {
$va = $a->get_metadata_by_id($mid);
$vb = $b->get_metadata_by_id($mid);
if ($va > $vb) return 1;
if ($va < $vb) return -1;
return 0;
});
}
return $docs;
}
}

114
app/models/Revision.php Normal file
View File

@@ -0,0 +1,114 @@
<?php
class Revision extends Model {
protected $_classes = [
"document" => "Document"
];
protected $_computed = [
"body" => "get_body",
];
protected $transform = [
"info" => "json"
];
function path() {
$rs = sprintf("%011d", $this->id);
$d1 = substr($rs, 0, 2);
$d2 = substr($rs, 2, 3);
$d3 = substr($rs, 5, 3);
$d4 = substr($rs, 8, 3);
$p = sprintf("%s/pdf/%s/%s/%s/%s", ROOT, $d1, $d2, $d3, $d4);
return $p;
}
function filename() {
if ($this->document) {
$out = $this->document->internal_id;
if (($this->revno != "") && ($this->revno != "0")) {
$out .= "-";
$out .= $this->revno;
}
$out .= " ";
$out .= $this->document->title . " " . $this->document->subtitle . " " . $this->document->subsubtitle;
} else {
$out = "doc";
}
$out = trim($out);
$out.= ".pdf";
$out = str_replace(" ", "_", $out);
$out = str_replace("/", "_", $out);
return $out;
}
function create_cover() {
$p = $this->path();
$pdf = new PDF($p . "/doc.pdf");
$cover = $p . "/cover.jpg";
$pdf->extract_page(0, $cover);
}
function cover($size = null) {
$f = new File($this->path() . "/cover.jpg");
if (!$f->exists()) {
$this->create_cover();
}
if ($size == null) {
return "/cover/" . $this->id . "/cover.jpg";
}
return "/cover/" . $this->id . "/" . $size . "/cover.jpg";
}
private $_body = null;
public function get_body() {
if ($this->_body == null) {
$ocr = OCR::find([["revision", "=", $this->id]])->first();
if (!$ocr) {
return "";
}
$this->_body = $ocr->body;
}
return $this->_body;
}
public function get_page($page, $dpi = 300) {
$file = new File(sprintf("%s/pages/%04d-%d.jpg", $this->path(), $page, $dpi));
if ($file->exists()) {
return new Image($file);
}
$dir = sprintf("%s/pages", $this->path());
if (!file_exists($dir)) {
mkdir($dir, 0777);
}
$pdf = new PDF(sprintf("%s/doc.pdf", $this->path()));
$img = $pdf->extract_page($page, $file->path(), $dpi);
if ($img) {
return $img;
}
return null;
}
public function purge() {
$dir = opendir($this->path());
while ($file = readdir($dir)) {
if (str_ends_with($file, ".jpg")) {
unlink($this->path() . "/" . $file);
}
}
closedir($dir);
$dir = opendir($this->path() . "/pages");
while ($file = readdir($dir)) {
if (str_ends_with($file, ".jpg")) {
unlink($this->path() . "/pages/" . $file);
}
}
closedir($dir);
}
}

4
app/models/Spider.php Normal file
View File

@@ -0,0 +1,4 @@
<?php
class Spider extends Model {
}

View File

@@ -0,0 +1,4 @@
<?php
class SpiderBanned extends Model {
}

4
app/models/SpiderDom.php Normal file
View File

@@ -0,0 +1,4 @@
<?php
class SpiderDom extends Model {
}

View File

@@ -0,0 +1,5 @@
<?php
class SpiderPage extends Model {
protected $table="pages";
}

5
app/models/User.php Normal file
View File

@@ -0,0 +1,5 @@
<?php
class User extends Model {
public static $table = "users";
}