DSc Neural Language Engine update

비밀글로 보호된 게시글입니다

← 홈으로
Tak2 · 2025.04.14 · 조회 1
DSc Neural Language Engine update
<p><br></p><p><br></p><div class="t2-code-block"><pre><code class="" contenteditable="true">&lt;?php include_once('./_common.php'); // stx 파라미터 값 처리 $stx = isset($_GET['stx']) ? trim($_GET['stx']) : ''; header('Content-Type: application/json; charset=UTF-8'); // 입력값 검증 if (empty($stx) || mb_strlen($stx, 'UTF-8') &lt; 2) { echo json_encode(['correction' =&gt; '', 'related' =&gt; []]); exit; } // 캐시 설정 $cache_key = 'search_suggest_' . md5($stx); $cache_file = G5_DATA_PATH . '/cache/' . $cache_key . '.php'; $cache_ttl = 3600; // 1시간 // 캐시 확인 if (file_exists($cache_file) &amp;&amp; (time() - filemtime($cache_file)) &lt; $cache_ttl) { $cached_data = include $cache_file; echo json_encode($cached_data); exit; } // 한글 문자를 위한 유니코드 변환 함수 function uniord($c) { $h = ord($c[0]); if ($h &lt;= 0x7F) return $h; if ($h &lt; 0xC2) return false; if ($h &lt;= 0xDF) return (($h &amp; 0x1F) &lt;&lt; 6) | (ord($c[1]) &amp; 0x3F); if ($h &lt;= 0xEF) return (($h &amp; 0x0F) &lt;&lt; 12) | ((ord($c[1]) &amp; 0x3F) &lt;&lt; 6) | (ord($c[2]) &amp; 0x3F); if ($h &lt;= 0xF4) return (($h &amp; 0x07) &lt;&lt; 18) | ((ord($c[1]) &amp; 0x3F) &lt;&lt; 12) | ((ord($c[2]) &amp; 0x3F) &lt;&lt; 6) | (ord($c[3]) &amp; 0x3F); return false; } // 초성 추출 함수 function extractInitials($text) { static $choseong = ['ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ']; $initials = ''; $text_len = mb_strlen($text, 'UTF-8'); for ($i = 0; $i &lt; $text_len; $i++) { $char = mb_substr($text, $i, 1, 'UTF-8'); $code = uniord($char); if ($code &gt;= 0xAC00 &amp;&amp; $code &lt;= 0xD7A3) { $initials .= $choseong[intval(($code - 0xAC00) / 588)]; } else { $initials .= $char; } } return $initials; } // 받침 검사 함수 function hasFinalConsonant($char) { $code = uniord($char); return ($code &gt;= 0xAC00 &amp;&amp; $code &lt;= 0xD7A3) &amp;&amp; (($code - 0xAC00) % 28 !== 0); } // 조사/접미사 패턴 확인 함수 function hasParticleOrSuffix($word) { static $patterns = [ '이란', '이라는', '이라고', '이라면', '이었던', '이었다', '이었고', '으로', '으로써', '으로서', '로는', '에서', '에서는', '에서도', '라는', '라고', '라면', '라도', '에게', '에게는', '에게서', '에게도', '과의', '과는', '과도', '와의', '와는', '와도', '보다', '보다는', '부터', '까지', '만큼', '처럼', '이다', '입니다', '이고', '이며', '이나', '하는', '하다', '했던', '했다', '했고', '하고', '된다', '됩니다', '되고', '되는', '되며', '되어', '되었', '인', '인데', '인지', '이지', '이네', '이요', '이에요', '이예요', '이어요', '지만', '지요', '지는', '지도', '지가' ]; $word_len = mb_strlen($word, 'UTF-8'); foreach ($patterns as $pattern) { $pattern_len = mb_strlen($pattern, 'UTF-8'); if ($word_len &gt; $pattern_len &amp;&amp; mb_substr($word, -$pattern_len, null, 'UTF-8') === $pattern) { return mb_strlen(mb_substr($word, 0, $word_len - $pattern_len, 'UTF-8')) &gt;= 2; } } return false; } // 조사 제거 함수 function removeKoreanParticles($word) { static $affixes = [ '에게서', '한테서', '으로서', '으로써', '부터는', '까지는', '에게도', '한테도', '에서는', '로도', '에게는', '한테는', '으로도', '과는', '와는', '보다는', '같이는', '만큼', '같이', '에게', '한테', '으로', '에서', '부터', '까지', '보다', '이나', '이랑', '이며', '이고', '은', '는', '이', '가', '을', '를', '의', '에', '과', '와', '로', '만', '도', '들', '이란', '이라는', '이라고', '이라면', '라는', '라고', '라면', '라도', '이지', '이네', '이요', '이에요', '이예요', '이어요', '지만', '지요', '그리고', '그러나', '하지만', '또한', '그런데', '따라서', '그래서', '하다', '되다', '적', '화', '성', '인', '인데', '인지' ]; $word_len = mb_strlen($word, 'UTF-8'); if ($word_len &lt;= 2) return $word; $original = $word; while (true) { $changed = false; foreach ($affixes as $affix) { $affix_len = mb_strlen($affix, 'UTF-8'); if ($word_len &lt;= $affix_len) continue; if (mb_substr($word, -$affix_len, null, 'UTF-8') === $affix) { $stem = mb_substr($word, 0, $word_len - $affix_len, 'UTF-8'); if (mb_strlen($stem, 'UTF-8') &lt; 2) continue; if (in_array($affix, ['이', '은', '을', '으로']) &amp;&amp; !hasFinalConsonant(mb_substr($stem, -1, 1, 'UTF-8'))) continue; if (in_array($affix, ['가', '는', '를', '로']) &amp;&amp; hasFinalConsonant(mb_substr($stem, -1, 1, 'UTF-8'))) continue; $word = $stem; $word_len = mb_strlen($word, 'UTF-8'); $changed = true; break; } } if (!$changed) break; } return ($word !== $original &amp;&amp; $word_len &gt;= 2) ? $word : $original; } // 유사도 계산 함수 function calculateOverallSimilarity($word1, $word2) { if (strtolower($word1) === strtolower($word2)) return 1.0; $len1 = mb_strlen($word1, 'UTF-8'); $len2 = mb_strlen($word2, 'UTF-8'); if (min($len1, $len2) / max($len1, $len2) &lt; 0.5) return 0.1; $prefix_len = 0; $max_prefix = min($len1, $len2); for ($i = 0; $i &lt; $max_prefix &amp;&amp; mb_substr($word1, $i, 1, 'UTF-8') === mb_substr($word2, $i, 1, 'UTF-8'); $i++) { $prefix_len++; } $lev_distance = improved_levenshtein($word1, $word2); $text_similarity = 1 - ($lev_distance / max($len1, $len2)); $initials1 = extractInitials($word1); $initials2 = extractInitials($word2); $init_len1 = mb_strlen($initials1, 'UTF-8'); $init_len2 = mb_strlen($initials2, 'UTF-8'); $init_lev = improved_levenshtein($initials1, $initials2); $initial_similarity = 1 - ($init_lev / max($init_len1, $init_len2)); $position_score = 0; $total_positions = min($init_len1, $init_len2); if ($total_positions &gt; 0) { $matches = 0; $weight_sum = 0; for ($i = 0; $i &lt; $total_positions; $i++) { $weight = ($i == 0) ? 3.0 : ($i == 1 ? 2.0 : 1.0); $weight_sum += $weight; if (mb_substr($initials1, $i, 1, 'UTF-8') === mb_substr($initials2, $i, 1, 'UTF-8')) { $matches += $weight; } } $position_score = $matches / $weight_sum; } $final_score = ($text_similarity * 0.4) + ($initial_similarity * 0.3) + ($position_score * 0.3); if ($prefix_len &gt;= 2 &amp;&amp; $len1 == $len2 &amp;&amp; $lev_distance &lt;= 2) { $final_score = max($final_score, 0.85); } return min($final_score, 1.0); } // 레벤슈타인 거리 계산 function improved_levenshtein($str1, $str2) { $len1 = mb_strlen($str1, 'UTF-8'); $len2 = mb_strlen($str2, 'UTF-8'); if ($len1 == 0) return $len2; if ($len2 == 0) return $len1; $matrix = array_fill(0, $len1 + 1, array_fill(0, $len2 + 1, 0)); for ($i = 0; $i &lt;= $len1; $i++) $matrix[$i][0] = $i; for ($j = 0; $j &lt;= $len2; $j++) $matrix[0][$j] = $j; $chars1 = preg_split('//u', $str1, -1, PREG_SPLIT_NO_EMPTY); $chars2 = preg_split('//u', $str2, -1, PREG_SPLIT_NO_EMPTY); for ($i = 1; $i &lt;= $len1; $i++) { for ($j = 1; $j &lt;= $len2; $j++) { $cost = ($chars1[$i-1] === $chars2[$j-1]) ? 0 : 1.0; if ($cost &gt; 0) { $code1 = uniord($chars1[$i-1]); $code2 = uniord($chars2[$j-1]); if ($code1 &gt;= 0xAC00 &amp;&amp; $code1 &lt;= 0xD7A3 &amp;&amp; $code2 &gt;= 0xAC00 &amp;&amp; $code2 &lt;= 0xD7A3) { $cho1 = intval(($code1 - 0xAC00) / 588); $cho2 = intval(($code2 - 0xAC00) / 588); $jung1 = intval(($code1 - 0xAC00) / 28) % 21; $jung2 = intval(($code2 - 0xAC00) / 28) % 21; if ($cho1 === $cho2) $cost -= 0.3; if ($jung1 === $jung2) $cost -= 0.4; } } $matrix[$i][$j] = min( $matrix[$i-1][$j] + 1, $matrix[$i][$j-1] + 1, $matrix[$i-1][$j-1] + $cost ); } } return $matrix[$len1][$len2]; } // 후보 단어 가져오기 function getCandidates() { $candidates = []; // 인기 검색어 $sql = "SELECT pp_word FROM g5_popular WHERE pp_date &gt;= DATE_SUB(CURDATE(), INTERVAL 7 DAY) ORDER BY pp_count DESC LIMIT 50"; $result = sql_query($sql); while ($row = sql_fetch_array($result)) { $candidates[] = $row['pp_word']; } // 게시판 데이터 $boards = ['g5_write_code', 'g5_write_gallery', 'g5_write_free', 'g5_write_it', 'g5_write_zip', 'g5_write_blog']; foreach ($boards as $board) { $sql = "SELECT wr_subject FROM $board WHERE wr_is_comment = 0 ORDER BY wr_datetime DESC LIMIT 20"; $result = sql_query($sql); while ($row = sql_fetch_array($result)) { preg_match_all('/[\pL\pN]+/u', $row['wr_subject'], $matches); foreach ($matches[0] as $word) { if (mb_strlen($word, 'UTF-8') &gt;= 2) { $candidates[] = $word; } } } } return array_unique($candidates); } // 후보 단어 필터링 function getCleanCandidates($candidates, $clean_stx) { $stx_initials = extractInitials($clean_stx); $stx_len = mb_strlen($clean_stx, 'UTF-8'); $priority = []; $others = []; foreach ($candidates as $term) { if (hasParticleOrSuffix($term)) continue; $clean_term = removeKoreanParticles($term); if (mb_strlen($clean_term, 'UTF-8') &lt; 2) continue; $term_initials = extractInitials($clean_term); $term_len = mb_strlen($clean_term, 'UTF-8'); if ($term_initials === $stx_initials &amp;&amp; $term_len === $stx_len) { $priority[$clean_term] = true; } else { $others[$clean_term] = true; } } return array_merge(array_keys($priority), array_keys($others)); } // 메인 처리 $clean_stx = removeKoreanParticles($stx); $candidates = getCandidates(); $clean_candidates = getCleanCandidates($candidates, $clean_stx); $exact_match = false; foreach ($clean_candidates as $term) { if (strtolower($term) === strtolower($clean_stx)) { $exact_match = true; break; } } $correction = ''; $related_with_scores = []; // 유사도 점수와 함께 저장할 배열 if (preg_match('/[\x{AC00}-\x{D7A3}]/u', $stx)) { foreach ($clean_candidates as $term) { if (strtolower($term) === strtolower($clean_stx)) continue; $similarity = calculateOverallSimilarity($clean_stx, $term); if ($similarity &gt;= 0.5 &amp;&amp; !hasParticleOrSuffix($term)) { $related_with_scores[$term] = $similarity; } } // 유사도 기준으로 내림차순 정렬 arsort($related_with_scores); // 맞춤법 교정: 입력어가 후보 단어와 유사할 때 교정 단어를 제공 if (!$exact_match &amp;&amp; !empty($related_with_scores)) { foreach ($related_with_scores as $term =&gt; $score) { if (mb_strlen($clean_stx, 'UTF-8') === mb_strlen($term, 'UTF-8') &amp;&amp; $score &gt;= 0.7) { $correction = $term; break; } } if (!$correction &amp;&amp; !empty($related_with_scores)) { $correction = key($related_with_scores); } } } // 연관 검색어: 유사도가 높은 순으로 최대 5개 제공 $related = array_keys($related_with_scores); $related = array_slice($related, 0, 5); $result = [ 'correction' =&gt; $correction, 'related' =&gt; $related ]; // 캐시 저장 $cache_data = '&lt;?php return ' . var_export($result, true) . '; ?&gt;'; file_put_contents($cache_file, $cache_data, LOCK_EX); echo json_encode($result); ?&gt;</code></pre></div><p><br></p><style>.t2-media-block img, .t2-media-block iframe, .t2-media-block video {border-radius: 15px !important; border: none !important; margin: 0 auto !important;}.file-container {width: 360px; background: white; border-radius: 12px; border: 1px solid #4a4a4a; padding: 20px; display: flex; align-items: center; font-family: Roboto, Arial, sans-serif; margin: 10px 0;}.file-icon {width: 42px; height: 52px; border-radius: 6px; margin-right: 20px; position: relative; flex-shrink: 0; overflow: hidden;}.file-info {flex-grow: 1; min-width: 0;}.file-name {font-size: 17px; font-weight: 500; color: rgba(0,0,0,0.87); margin: 0 0 6px 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;}.file-details {color: rgba(0,0,0,0.6); font-size: 14px; line-height: 1.5;}.file-details span {display: inline-block; margin-right: 12px;}.t2-table {width: 100%; border-collapse: collapse; margin: 15px 0; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;}.t2-table th, .t2-table td {border: 1px solid #ccc; padding: 8px; vertical-align: top;}.t2-table th {background-color: #f5f5f5; font-weight: 500;}.table-responsive {display: block; width: 100%; overflow-x: auto; margin-bottom: 1rem; -webkit-overflow-scrolling: touch;}.t2-table.t2-table-large {min-width: 800px;}</style>