Files
molenda.net/src/Application/Services/BlogProvider.php
T
2026-05-13 22:43:29 +02:00

221 lines
7.0 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Application\Services;
class BlogProvider
{
private array $tableCache = [];
private string $cacheDir;
public function __construct()
{
$this->cacheDir = dirname(__DIR__, 3) . '/var/cache/markov';
if (!is_dir($this->cacheDir)) {
@mkdir($this->cacheDir, 0775, true);
}
}
private function generate_markov_table($text, $look_forward = 4)
{
// build counts in a single pass and compute cumulative weights for fast selection
$len = strlen($text);
$table = [];
// single pass to build counts
$max = $len - $look_forward;
for ($i = 0; $i < $max; $i++) {
$key = substr($text, $i, $look_forward);
$next = substr($text, $i + $look_forward, $look_forward);
if (!isset($table[$key])) $table[$key] = [];
if (isset($table[$key][$next])) {
$table[$key][$next]++;
} else {
$table[$key][$next] = 1;
}
}
// convert counts into cumulative arrays with totals for fast selection
// to save memory we map next-token strings to integer ids and keep a shared token list
$tokenIndex = [];
$tokenList = [];
$nextId = 0;
foreach ($table as $key => $counts) {
$cum = [];
$sum = 0;
foreach ($counts as $item => $weight) {
if (!isset($tokenIndex[$item])) {
$tokenIndex[$item] = $nextId;
$tokenList[$nextId] = $item;
$nextId++;
}
$id = $tokenIndex[$item];
$sum += $weight;
$cum[] = ['id' => $id, 'cum' => $sum];
}
$table[$key] = ['total' => $sum, 'cum' => $cum];
}
// attach token list for this table so picker can map ids back to strings
$table['_tokens'] = $tokenList;
return $table;
}
private function generate_markov_text($length, $table, $look_forward = 4)
{
// pick a random starting state
$states = array_keys($table);
if (!$states) return '';
$char = $states[array_rand($states)];
$o = $char;
$iterations = (int)ceil($length / max(1, $look_forward));
for ($i = 0; $i < $iterations; $i++) {
$entry = $table[$char] ?? null;
if ($entry && !empty($entry['cum'])) {
$newId = $this->pick_weighted($entry);
if ($newId !== false) {
$tokenList = $table['_tokens'] ?? [];
$newchar = $tokenList[$newId] ?? false;
if ($newchar !== false) {
$char = $newchar;
$o .= $newchar;
continue;
}
}
}
// fallback: pick another random state
$char = $states[array_rand($states)];
$o .= $char;
}
return $o;
}
private function pick_weighted(array $entry)
{
// entry contains 'total' and 'cum' keys
if (empty($entry['cum']) || empty($entry['total'])) return false;
$rand = mt_rand(1, $entry['total']);
foreach ($entry['cum'] as $pair) {
if ($rand <= $pair['cum']) return $pair['id'];
}
return false;
}
private function generate($length)
{
$path = dirname(__DIR__, 3) . '/lib/php_text.txt';
if (!is_readable($path)) {
throw new \RuntimeException("Missing data file: {$path}");
}
$table = $this->getTableForFile($path);
return $this->generate_markov_text($length, $table);
}
private function generate_code($length)
{
$path = dirname(__DIR__, 3) . '/lib/php_code.txt';
if (!is_readable($path)) {
throw new \RuntimeException("Missing data file: {$path}");
}
$table = $this->getTableForFile($path);
return $this->generate_markov_text($length, $table, 4);
}
private function getTableForFile(string $path, int $look_forward = 4): array
{
$cacheKey = md5($path . '|' . $look_forward);
if (isset($this->tableCache[$cacheKey])) return $this->tableCache[$cacheKey];
$cacheFile = $this->cacheDir . '/markov_' . $cacheKey . '.ser';
$srcMTime = filemtime($path) ?: 0;
if (is_readable($cacheFile)) {
$data = @unserialize(@file_get_contents($cacheFile));
if (is_array($data) && isset($data['mtime']) && $data['mtime'] === $srcMTime && isset($data['table'])) {
$this->tableCache[$cacheKey] = $data['table'];
return $data['table'];
}
}
$text = file_get_contents($path);
$table = $this->generate_markov_table($text, $look_forward);
// write cache best-effort
try {
@file_put_contents($cacheFile, serialize(['mtime' => $srcMTime, 'table' => $table]), LOCK_EX);
} catch (\Throwable $e) {
// ignore
}
$this->tableCache[$cacheKey] = $table;
return $table;
}
private function stubify(string $title): string
{
return strtolower(str_replace(' ', '-', $title));
}
private function get_title(): string
{
$length = random_int(4, 30);
$title = $this->generate($length * 10);
$title = str_replace(['.', ',', '!', '?', '"', '—'], '', $title);
$title = substr($title, 0, strpos($title, ' ', $length));
return ucwords($title);
}
private function get_headline(string $stub): string
{
return $this->generate(100) . '...';
}
private function getContent(string $stub): string
{
$paragraphCount = random_int(5, 16);
$paragraphs = [];
for ($i = 0; $i < $paragraphCount; $i++) {
if ($i === 0) {
$paragraphs[] = '<p><strong>' . htmlspecialchars($this->generate(100)) . '</strong></p>';
} elseif ($i % 3 === 2) {
$paragraphs[] = '<div class="skill-item"><pre><code>' . htmlspecialchars($this->generate_code(500)) . '</code></pre></div>';
} else {
$paragraphs[] = '<p>' . htmlspecialchars($this->generate(500)) . '</p>';
}
}
return implode("\n\n", $paragraphs);
}
public function getRandomArticles(int $count): array
{
$articles = [];
for ($i = 0; $i < $count; $i++) {
$title = $this->get_title();
$articles[] = [
'title' => $title,
'stub' => $this->stubify($title),
'headline' => $this->get_headline($this->stubify($title))
];
}
return $articles;
}
public function getByStub(string $stub): ?array
{
return [
'title' => ucwords(str_replace('-', ' ', $stub)),
'stub' => $stub,
'headline' => ucfirst($this->get_headline($stub)),
'content' => $this->getContent($stub)
];
}
}