gruby refaktor otyły panie
This commit is contained in:
@@ -0,0 +1,220 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Application\Services;
|
||||
|
||||
class BlogProvider
|
||||
{
|
||||
private array $tableCache = [];
|
||||
private string $cacheDir;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->cacheDir = dirname(__DIR__, 3) . '/var/cache/markov';
|
||||
if (!is_dir($this->cacheDir)) {
|
||||
@mkdir($this->cacheDir, 0775, true);
|
||||
}
|
||||
}
|
||||
|
||||
private function generate_markov_table($text, $look_forward = 4)
|
||||
{
|
||||
// build counts in a single pass and compute cumulative weights for fast selection
|
||||
$len = strlen($text);
|
||||
$table = [];
|
||||
|
||||
// single pass to build counts
|
||||
$max = $len - $look_forward;
|
||||
for ($i = 0; $i < $max; $i++) {
|
||||
$key = substr($text, $i, $look_forward);
|
||||
$next = substr($text, $i + $look_forward, $look_forward);
|
||||
if (!isset($table[$key])) $table[$key] = [];
|
||||
if (isset($table[$key][$next])) {
|
||||
$table[$key][$next]++;
|
||||
} else {
|
||||
$table[$key][$next] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// convert counts into cumulative arrays with totals for fast selection
|
||||
// to save memory we map next-token strings to integer ids and keep a shared token list
|
||||
$tokenIndex = [];
|
||||
$tokenList = [];
|
||||
$nextId = 0;
|
||||
|
||||
foreach ($table as $key => $counts) {
|
||||
$cum = [];
|
||||
$sum = 0;
|
||||
foreach ($counts as $item => $weight) {
|
||||
if (!isset($tokenIndex[$item])) {
|
||||
$tokenIndex[$item] = $nextId;
|
||||
$tokenList[$nextId] = $item;
|
||||
$nextId++;
|
||||
}
|
||||
$id = $tokenIndex[$item];
|
||||
$sum += $weight;
|
||||
$cum[] = ['id' => $id, 'cum' => $sum];
|
||||
}
|
||||
$table[$key] = ['total' => $sum, 'cum' => $cum];
|
||||
}
|
||||
|
||||
// attach token list for this table so picker can map ids back to strings
|
||||
$table['_tokens'] = $tokenList;
|
||||
|
||||
return $table;
|
||||
}
|
||||
|
||||
private function generate_markov_text($length, $table, $look_forward = 4)
|
||||
{
|
||||
// pick a random starting state
|
||||
$states = array_keys($table);
|
||||
if (!$states) return '';
|
||||
$char = $states[array_rand($states)];
|
||||
$o = $char;
|
||||
|
||||
$iterations = (int)ceil($length / max(1, $look_forward));
|
||||
for ($i = 0; $i < $iterations; $i++) {
|
||||
$entry = $table[$char] ?? null;
|
||||
if ($entry && !empty($entry['cum'])) {
|
||||
$newId = $this->pick_weighted($entry);
|
||||
if ($newId !== false) {
|
||||
$tokenList = $table['_tokens'] ?? [];
|
||||
$newchar = $tokenList[$newId] ?? false;
|
||||
if ($newchar !== false) {
|
||||
$char = $newchar;
|
||||
$o .= $newchar;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
// fallback: pick another random state
|
||||
$char = $states[array_rand($states)];
|
||||
$o .= $char;
|
||||
}
|
||||
|
||||
return $o;
|
||||
}
|
||||
|
||||
private function pick_weighted(array $entry)
|
||||
{
|
||||
// entry contains 'total' and 'cum' keys
|
||||
if (empty($entry['cum']) || empty($entry['total'])) return false;
|
||||
$rand = mt_rand(1, $entry['total']);
|
||||
foreach ($entry['cum'] as $pair) {
|
||||
if ($rand <= $pair['cum']) return $pair['id'];
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private function generate($length)
|
||||
{
|
||||
$path = dirname(__DIR__, 3) . '/lib/php_text.txt';
|
||||
if (!is_readable($path)) {
|
||||
throw new \RuntimeException("Missing data file: {$path}");
|
||||
}
|
||||
|
||||
$table = $this->getTableForFile($path);
|
||||
return $this->generate_markov_text($length, $table);
|
||||
}
|
||||
|
||||
private function generate_code($length)
|
||||
{
|
||||
$path = dirname(__DIR__, 3) . '/lib/php_code.txt';
|
||||
if (!is_readable($path)) {
|
||||
throw new \RuntimeException("Missing data file: {$path}");
|
||||
}
|
||||
|
||||
$table = $this->getTableForFile($path);
|
||||
return $this->generate_markov_text($length, $table, 4);
|
||||
}
|
||||
|
||||
private function getTableForFile(string $path, int $look_forward = 4): array
|
||||
{
|
||||
$cacheKey = md5($path . '|' . $look_forward);
|
||||
if (isset($this->tableCache[$cacheKey])) return $this->tableCache[$cacheKey];
|
||||
|
||||
$cacheFile = $this->cacheDir . '/markov_' . $cacheKey . '.ser';
|
||||
$srcMTime = filemtime($path) ?: 0;
|
||||
|
||||
if (is_readable($cacheFile)) {
|
||||
$data = @unserialize(@file_get_contents($cacheFile));
|
||||
if (is_array($data) && isset($data['mtime']) && $data['mtime'] === $srcMTime && isset($data['table'])) {
|
||||
$this->tableCache[$cacheKey] = $data['table'];
|
||||
return $data['table'];
|
||||
}
|
||||
}
|
||||
|
||||
$text = file_get_contents($path);
|
||||
$table = $this->generate_markov_table($text, $look_forward);
|
||||
|
||||
// write cache best-effort
|
||||
try {
|
||||
@file_put_contents($cacheFile, serialize(['mtime' => $srcMTime, 'table' => $table]), LOCK_EX);
|
||||
} catch (\Throwable $e) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
$this->tableCache[$cacheKey] = $table;
|
||||
return $table;
|
||||
}
|
||||
|
||||
private function stubify(string $title): string
|
||||
{
|
||||
return strtolower(str_replace(' ', '-', $title));
|
||||
}
|
||||
|
||||
private function get_title(): string
|
||||
{
|
||||
$length = random_int(4, 30);
|
||||
$title = $this->generate($length * 10);
|
||||
$title = str_replace(['.', ',', '!', '?', '"', '—'], '', $title);
|
||||
$title = substr($title, 0, strpos($title, ' ', $length));
|
||||
return ucwords($title);
|
||||
}
|
||||
|
||||
private function get_headline(string $stub): string
|
||||
{
|
||||
return $this->generate(100) . '...';
|
||||
}
|
||||
|
||||
private function getContent(string $stub): string
|
||||
{
|
||||
$paragraphCount = random_int(5, 16);
|
||||
$paragraphs = [];
|
||||
for ($i = 0; $i < $paragraphCount; $i++) {
|
||||
if ($i === 0) {
|
||||
$paragraphs[] = '<p><strong>' . htmlspecialchars($this->generate(100)) . '</strong></p>';
|
||||
} elseif ($i % 3 === 2) {
|
||||
$paragraphs[] = '<div class="skill-item"><pre><code>' . htmlspecialchars($this->generate_code(500)) . '</code></pre></div>';
|
||||
} else {
|
||||
$paragraphs[] = '<p>' . htmlspecialchars($this->generate(500)) . '</p>';
|
||||
}
|
||||
}
|
||||
|
||||
return implode("\n\n", $paragraphs);
|
||||
}
|
||||
|
||||
public function getRandomArticles(int $count): array
|
||||
{
|
||||
$articles = [];
|
||||
for ($i = 0; $i < $count; $i++) {
|
||||
$title = $this->get_title();
|
||||
$articles[] = [
|
||||
'title' => $title,
|
||||
'stub' => $this->stubify($title),
|
||||
'headline' => $this->get_headline($this->stubify($title))
|
||||
];
|
||||
}
|
||||
return $articles;
|
||||
}
|
||||
|
||||
public function getByStub(string $stub): ?array
|
||||
{
|
||||
return [
|
||||
'title' => ucwords(str_replace('-', ' ', $stub)),
|
||||
'stub' => $stub,
|
||||
'headline' => ucfirst($this->get_headline($stub)),
|
||||
'content' => $this->getContent($stub)
|
||||
];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user