cacheDir = dirname(__DIR__, 3) . '/var/cache/markov'; if (!is_dir($this->cacheDir)) { @mkdir($this->cacheDir, 0775, true); } } private function generate_markov_table($text, $look_forward = 4) { // build counts in a single pass and compute cumulative weights for fast selection $len = strlen($text); $table = []; // single pass to build counts $max = $len - $look_forward; for ($i = 0; $i < $max; $i++) { $key = substr($text, $i, $look_forward); $next = substr($text, $i + $look_forward, $look_forward); if (!isset($table[$key])) $table[$key] = []; if (isset($table[$key][$next])) { $table[$key][$next]++; } else { $table[$key][$next] = 1; } } // convert counts into cumulative arrays with totals for fast selection // to save memory we map next-token strings to integer ids and keep a shared token list $tokenIndex = []; $tokenList = []; $nextId = 0; foreach ($table as $key => $counts) { $cum = []; $sum = 0; foreach ($counts as $item => $weight) { if (!isset($tokenIndex[$item])) { $tokenIndex[$item] = $nextId; $tokenList[$nextId] = $item; $nextId++; } $id = $tokenIndex[$item]; $sum += $weight; $cum[] = ['id' => $id, 'cum' => $sum]; } $table[$key] = ['total' => $sum, 'cum' => $cum]; } // attach token list for this table so picker can map ids back to strings $table['_tokens'] = $tokenList; return $table; } private function generate_markov_text($length, $table, $look_forward = 4) { // pick a random starting state $states = array_keys($table); if (!$states) return ''; $char = $states[array_rand($states)]; $o = $char; $iterations = (int)ceil($length / max(1, $look_forward)); for ($i = 0; $i < $iterations; $i++) { $entry = $table[$char] ?? null; if ($entry && !empty($entry['cum'])) { $newId = $this->pick_weighted($entry); if ($newId !== false) { $tokenList = $table['_tokens'] ?? []; $newchar = $tokenList[$newId] ?? false; if ($newchar !== false) { $char = $newchar; $o .= $newchar; continue; } } } // fallback: pick another random state $char = $states[array_rand($states)]; $o .= $char; } return $o; } private function pick_weighted(array $entry) { // entry contains 'total' and 'cum' keys if (empty($entry['cum']) || empty($entry['total'])) return false; $rand = mt_rand(1, $entry['total']); foreach ($entry['cum'] as $pair) { if ($rand <= $pair['cum']) return $pair['id']; } return false; } private function generate($length) { $path = dirname(__DIR__, 3) . '/lib/php_text.txt'; if (!is_readable($path)) { throw new \RuntimeException("Missing data file: {$path}"); } $table = $this->getTableForFile($path); return $this->generate_markov_text($length, $table); } private function generate_code($length) { $path = dirname(__DIR__, 3) . '/lib/php_code.txt'; if (!is_readable($path)) { throw new \RuntimeException("Missing data file: {$path}"); } $table = $this->getTableForFile($path); return $this->generate_markov_text($length, $table, 4); } private function getTableForFile(string $path, int $look_forward = 4): array { $cacheKey = md5($path . '|' . $look_forward); if (isset($this->tableCache[$cacheKey])) return $this->tableCache[$cacheKey]; $cacheFile = $this->cacheDir . '/markov_' . $cacheKey . '.ser'; $srcMTime = filemtime($path) ?: 0; if (is_readable($cacheFile)) { $data = @unserialize(@file_get_contents($cacheFile)); if (is_array($data) && isset($data['mtime']) && $data['mtime'] === $srcMTime && isset($data['table'])) { $this->tableCache[$cacheKey] = $data['table']; return $data['table']; } } $text = file_get_contents($path); $table = $this->generate_markov_table($text, $look_forward); // write cache best-effort try { @file_put_contents($cacheFile, serialize(['mtime' => $srcMTime, 'table' => $table]), LOCK_EX); } catch (\Throwable $e) { // ignore } $this->tableCache[$cacheKey] = $table; return $table; } private function stubify(string $title): string { return strtolower(str_replace(' ', '-', $title)); } private function get_title(): string { $length = random_int(4, 30); $title = $this->generate($length * 10); $title = str_replace(['.', ',', '!', '?', '"', '—'], '', $title); $title = substr($title, 0, strpos($title, ' ', $length)); return ucwords($title); } private function get_headline(string $stub): string { return $this->generate(100) . '...'; } private function getContent(string $stub): string { $paragraphCount = random_int(5, 16); $paragraphs = []; for ($i = 0; $i < $paragraphCount; $i++) { if ($i === 0) { $paragraphs[] = '
' . htmlspecialchars($this->generate(100)) . '
'; } elseif ($i % 3 === 2) { $paragraphs[] = '' . htmlspecialchars($this->generate_code(500)) . '' . htmlspecialchars($this->generate(500)) . '
'; } } return implode("\n\n", $paragraphs); } public function getRandomArticles(int $count): array { $articles = []; for ($i = 0; $i < $count; $i++) { $title = $this->get_title(); $articles[] = [ 'title' => $title, 'stub' => $this->stubify($title), 'headline' => $this->get_headline($this->stubify($title)) ]; } return $articles; } public function getByStub(string $stub): ?array { return [ 'title' => ucwords(str_replace('-', ' ', $stub)), 'stub' => $stub, 'headline' => ucfirst($this->get_headline($stub)), 'content' => $this->getContent($stub) ]; } }