221 lines
7.0 KiB
PHP
221 lines
7.0 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Application\Services;
|
|
|
|
class BlogProvider
|
|
{
|
|
private array $tableCache = [];
|
|
private string $cacheDir;
|
|
|
|
public function __construct()
|
|
{
|
|
$this->cacheDir = dirname(__DIR__, 3) . '/var/cache/markov';
|
|
if (!is_dir($this->cacheDir)) {
|
|
@mkdir($this->cacheDir, 0775, true);
|
|
}
|
|
}
|
|
|
|
private function generate_markov_table($text, $look_forward = 4)
|
|
{
|
|
// build counts in a single pass and compute cumulative weights for fast selection
|
|
$len = strlen($text);
|
|
$table = [];
|
|
|
|
// single pass to build counts
|
|
$max = $len - $look_forward;
|
|
for ($i = 0; $i < $max; $i++) {
|
|
$key = substr($text, $i, $look_forward);
|
|
$next = substr($text, $i + $look_forward, $look_forward);
|
|
if (!isset($table[$key])) $table[$key] = [];
|
|
if (isset($table[$key][$next])) {
|
|
$table[$key][$next]++;
|
|
} else {
|
|
$table[$key][$next] = 1;
|
|
}
|
|
}
|
|
|
|
// convert counts into cumulative arrays with totals for fast selection
|
|
// to save memory we map next-token strings to integer ids and keep a shared token list
|
|
$tokenIndex = [];
|
|
$tokenList = [];
|
|
$nextId = 0;
|
|
|
|
foreach ($table as $key => $counts) {
|
|
$cum = [];
|
|
$sum = 0;
|
|
foreach ($counts as $item => $weight) {
|
|
if (!isset($tokenIndex[$item])) {
|
|
$tokenIndex[$item] = $nextId;
|
|
$tokenList[$nextId] = $item;
|
|
$nextId++;
|
|
}
|
|
$id = $tokenIndex[$item];
|
|
$sum += $weight;
|
|
$cum[] = ['id' => $id, 'cum' => $sum];
|
|
}
|
|
$table[$key] = ['total' => $sum, 'cum' => $cum];
|
|
}
|
|
|
|
// attach token list for this table so picker can map ids back to strings
|
|
$table['_tokens'] = $tokenList;
|
|
|
|
return $table;
|
|
}
|
|
|
|
private function generate_markov_text($length, $table, $look_forward = 4)
|
|
{
|
|
// pick a random starting state
|
|
$states = array_keys($table);
|
|
if (!$states) return '';
|
|
$char = $states[array_rand($states)];
|
|
$o = $char;
|
|
|
|
$iterations = (int)ceil($length / max(1, $look_forward));
|
|
for ($i = 0; $i < $iterations; $i++) {
|
|
$entry = $table[$char] ?? null;
|
|
if ($entry && !empty($entry['cum'])) {
|
|
$newId = $this->pick_weighted($entry);
|
|
if ($newId !== false) {
|
|
$tokenList = $table['_tokens'] ?? [];
|
|
$newchar = $tokenList[$newId] ?? false;
|
|
if ($newchar !== false) {
|
|
$char = $newchar;
|
|
$o .= $newchar;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
// fallback: pick another random state
|
|
$char = $states[array_rand($states)];
|
|
$o .= $char;
|
|
}
|
|
|
|
return $o;
|
|
}
|
|
|
|
private function pick_weighted(array $entry)
|
|
{
|
|
// entry contains 'total' and 'cum' keys
|
|
if (empty($entry['cum']) || empty($entry['total'])) return false;
|
|
$rand = mt_rand(1, $entry['total']);
|
|
foreach ($entry['cum'] as $pair) {
|
|
if ($rand <= $pair['cum']) return $pair['id'];
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private function generate($length)
|
|
{
|
|
$path = dirname(__DIR__, 3) . '/lib/php_text.txt';
|
|
if (!is_readable($path)) {
|
|
throw new \RuntimeException("Missing data file: {$path}");
|
|
}
|
|
|
|
$table = $this->getTableForFile($path);
|
|
return $this->generate_markov_text($length, $table);
|
|
}
|
|
|
|
private function generate_code($length)
|
|
{
|
|
$path = dirname(__DIR__, 3) . '/lib/php_code.txt';
|
|
if (!is_readable($path)) {
|
|
throw new \RuntimeException("Missing data file: {$path}");
|
|
}
|
|
|
|
$table = $this->getTableForFile($path);
|
|
return $this->generate_markov_text($length, $table, 4);
|
|
}
|
|
|
|
private function getTableForFile(string $path, int $look_forward = 4): array
|
|
{
|
|
$cacheKey = md5($path . '|' . $look_forward);
|
|
if (isset($this->tableCache[$cacheKey])) return $this->tableCache[$cacheKey];
|
|
|
|
$cacheFile = $this->cacheDir . '/markov_' . $cacheKey . '.ser';
|
|
$srcMTime = filemtime($path) ?: 0;
|
|
|
|
if (is_readable($cacheFile)) {
|
|
$data = @unserialize(@file_get_contents($cacheFile));
|
|
if (is_array($data) && isset($data['mtime']) && $data['mtime'] === $srcMTime && isset($data['table'])) {
|
|
$this->tableCache[$cacheKey] = $data['table'];
|
|
return $data['table'];
|
|
}
|
|
}
|
|
|
|
$text = file_get_contents($path);
|
|
$table = $this->generate_markov_table($text, $look_forward);
|
|
|
|
// write cache best-effort
|
|
try {
|
|
@file_put_contents($cacheFile, serialize(['mtime' => $srcMTime, 'table' => $table]), LOCK_EX);
|
|
} catch (\Throwable $e) {
|
|
// ignore
|
|
}
|
|
|
|
$this->tableCache[$cacheKey] = $table;
|
|
return $table;
|
|
}
|
|
|
|
private function stubify(string $title): string
|
|
{
|
|
return strtolower(str_replace(' ', '-', $title));
|
|
}
|
|
|
|
private function get_title(): string
|
|
{
|
|
$length = random_int(4, 30);
|
|
$title = $this->generate($length * 10);
|
|
$title = str_replace(['.', ',', '!', '?', '"', '—'], '', $title);
|
|
$title = substr($title, 0, strpos($title, ' ', $length));
|
|
return ucwords($title);
|
|
}
|
|
|
|
private function get_headline(string $stub): string
|
|
{
|
|
return $this->generate(100) . '...';
|
|
}
|
|
|
|
private function getContent(string $stub): string
|
|
{
|
|
$paragraphCount = random_int(5, 16);
|
|
$paragraphs = [];
|
|
for ($i = 0; $i < $paragraphCount; $i++) {
|
|
if ($i === 0) {
|
|
$paragraphs[] = '<p><strong>' . htmlspecialchars($this->generate(100)) . '</strong></p>';
|
|
} elseif ($i % 3 === 2) {
|
|
$paragraphs[] = '<div class="skill-item"><pre><code>' . htmlspecialchars($this->generate_code(500)) . '</code></pre></div>';
|
|
} else {
|
|
$paragraphs[] = '<p>' . htmlspecialchars($this->generate(500)) . '</p>';
|
|
}
|
|
}
|
|
|
|
return implode("\n\n", $paragraphs);
|
|
}
|
|
|
|
public function getRandomArticles(int $count): array
|
|
{
|
|
$articles = [];
|
|
for ($i = 0; $i < $count; $i++) {
|
|
$title = $this->get_title();
|
|
$articles[] = [
|
|
'title' => $title,
|
|
'stub' => $this->stubify($title),
|
|
'headline' => $this->get_headline($this->stubify($title))
|
|
];
|
|
}
|
|
return $articles;
|
|
}
|
|
|
|
public function getByStub(string $stub): ?array
|
|
{
|
|
return [
|
|
'title' => ucwords(str_replace('-', ' ', $stub)),
|
|
'stub' => $stub,
|
|
'headline' => ucfirst($this->get_headline($stub)),
|
|
'content' => $this->getContent($stub)
|
|
];
|
|
}
|
|
}
|