최고관리자 ·
2025.04.13 ·
조회 3
DSc Neural Language Engine
<p><br></p><p><br></p><div class="t2-code-block"><pre><code class="hljs xml" contenteditable="true"><span class="php"><span class="hljs-meta"><?php</span>
<span class="hljs-keyword">include_once</span>(<span class="hljs-string">'./_common.php'</span>);
<span class="hljs-comment">// stx 파라미터 값 처리</span>
$stx = <span class="hljs-keyword">isset</span>($_GET[<span class="hljs-string">'stx'</span>]) ? trim($_GET[<span class="hljs-string">'stx'</span>]) : <span class="hljs-string">''</span>;
header(<span class="hljs-string">'Content-Type: application/json'</span>);
<span class="hljs-keyword">if</span> (<span class="hljs-keyword">empty</span>($stx) || mb_strlen($stx, <span class="hljs-string">'UTF-8'</span>) < <span class="hljs-number">2</span>) {
<span class="hljs-keyword">echo</span> json_encode([<span class="hljs-string">'correction'</span> => <span class="hljs-string">''</span>, <span class="hljs-string">'related'</span> => []]);
<span class="hljs-keyword">exit</span>;
}
<span class="hljs-comment">// 한글 문자를 위한 유니코드 변환 함수</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">uniord</span><span class="hljs-params">($c)</span> </span>{
$h = ord($c[<span class="hljs-number">0</span>]);
<span class="hljs-keyword">if</span> ($h <= <span class="hljs-number">0x7F</span>) {
<span class="hljs-keyword">return</span> $h;
} <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> ($h < <span class="hljs-number">0xC2</span>) {
<span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>;
} <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> ($h <= <span class="hljs-number">0xDF</span>) {
<span class="hljs-keyword">return</span> (($h & <span class="hljs-number">0x1F</span>) << <span class="hljs-number">6</span>) | (ord($c[<span class="hljs-number">1</span>]) & <span class="hljs-number">0x3F</span>);
} <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> ($h <= <span class="hljs-number">0xEF</span>) {
<span class="hljs-keyword">return</span> (($h & <span class="hljs-number">0x0F</span>) << <span class="hljs-number">12</span>) | ((ord($c[<span class="hljs-number">1</span>]) & <span class="hljs-number">0x3F</span>) << <span class="hljs-number">6</span>)
| (ord($c[<span class="hljs-number">2</span>]) & <span class="hljs-number">0x3F</span>);
} <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> ($h <= <span class="hljs-number">0xF4</span>) {
<span class="hljs-keyword">return</span> (($h & <span class="hljs-number">0x07</span>) << <span class="hljs-number">18</span>) | ((ord($c[<span class="hljs-number">1</span>]) & <span class="hljs-number">0x3F</span>) << <span class="hljs-number">12</span>)
| ((ord($c[<span class="hljs-number">2</span>]) & <span class="hljs-number">0x3F</span>) << <span class="hljs-number">6</span>)
| (ord($c[<span class="hljs-number">3</span>]) & <span class="hljs-number">0x3F</span>);
} <span class="hljs-keyword">else</span> {
<span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>;
}
}
<span class="hljs-comment">/**
* 한글 문자열에서 초성만 추출하는 함수
*/</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">extractInitials</span><span class="hljs-params">($text)</span> </span>{
$initials = <span class="hljs-string">''</span>;
$text_len = mb_strlen($text, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">for</span> ($i = <span class="hljs-number">0</span>; $i < $text_len; $i++) {
$char = mb_substr($text, $i, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>);
$code = uniord($char);
<span class="hljs-comment">// 한글 유니코드 범위 (AC00-D7A3)</span>
<span class="hljs-keyword">if</span> ($code >= <span class="hljs-number">0xAC00</span> && $code <= <span class="hljs-number">0xD7A3</span>) {
<span class="hljs-comment">// 초성 추출 공식: ((문자코드 - 0xAC00) / 28) / 21</span>
$choseong_index = intval(($code - <span class="hljs-number">0xAC00</span>) / <span class="hljs-number">28</span> / <span class="hljs-number">21</span>);
<span class="hljs-comment">// 초성 배열 (19개)</span>
$choseong = <span class="hljs-keyword">array</span>(<span class="hljs-string">'ㄱ'</span>, <span class="hljs-string">'ㄲ'</span>, <span class="hljs-string">'ㄴ'</span>, <span class="hljs-string">'ㄷ'</span>, <span class="hljs-string">'ㄸ'</span>, <span class="hljs-string">'ㄹ'</span>, <span class="hljs-string">'ㅁ'</span>, <span class="hljs-string">'ㅂ'</span>, <span class="hljs-string">'ㅃ'</span>, <span class="hljs-string">'ㅅ'</span>, <span class="hljs-string">'ㅆ'</span>, <span class="hljs-string">'ㅇ'</span>, <span class="hljs-string">'ㅈ'</span>, <span class="hljs-string">'ㅉ'</span>, <span class="hljs-string">'ㅊ'</span>, <span class="hljs-string">'ㅋ'</span>, <span class="hljs-string">'ㅌ'</span>, <span class="hljs-string">'ㅍ'</span>, <span class="hljs-string">'ㅎ'</span>);
<span class="hljs-comment">// 초성 추가</span>
$initials .= $choseong[$choseong_index];
} <span class="hljs-keyword">else</span> {
<span class="hljs-comment">// 한글이 아닌 경우 그대로 추가</span>
$initials .= $char;
}
}
<span class="hljs-keyword">return</span> $initials;
}
<span class="hljs-comment">/**
* 한글 글자에 받침(종성)이 존재하는지 검사
*/</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">hasFinalConsonant</span><span class="hljs-params">($char)</span> </span>{
$code = uniord($char);
<span class="hljs-keyword">if</span> ($code >= <span class="hljs-number">0xAC00</span> && $code <= <span class="hljs-number">0xD7A3</span>) {
<span class="hljs-keyword">return</span> (($code - <span class="hljs-number">0xAC00</span>) % <span class="hljs-number">28</span>) !== <span class="hljs-number">0</span>;
}
<span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>;
}
<span class="hljs-comment">/**
* 한국어 단어에서 조사나 접미사가 붙어있는지 확인
* 자주 발생하는 특정 패턴에 대한 검사 추가
*/</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">hasParticleOrSuffix</span><span class="hljs-params">($word)</span> </span>{
<span class="hljs-comment">// 흔한 조사/접미사 패턴 목록</span>
$common_patterns = [
<span class="hljs-string">'이란'</span>, <span class="hljs-string">'이라는'</span>, <span class="hljs-string">'이라고'</span>, <span class="hljs-string">'이라면'</span>, <span class="hljs-string">'이었던'</span>, <span class="hljs-string">'이었다'</span>, <span class="hljs-string">'이었고'</span>,
<span class="hljs-string">'으로'</span>, <span class="hljs-string">'으로써'</span>, <span class="hljs-string">'으로서'</span>, <span class="hljs-string">'으로는'</span>, <span class="hljs-string">'으로도'</span>,
<span class="hljs-string">'에서'</span>, <span class="hljs-string">'에서는'</span>, <span class="hljs-string">'에서도'</span>, <span class="hljs-string">'에서만'</span>, <span class="hljs-string">'에서의'</span>,
<span class="hljs-string">'라는'</span>, <span class="hljs-string">'라고'</span>, <span class="hljs-string">'라면'</span>, <span class="hljs-string">'라도'</span>, <span class="hljs-string">'라서'</span>,
<span class="hljs-string">'에게'</span>, <span class="hljs-string">'에게는'</span>, <span class="hljs-string">'에게서'</span>, <span class="hljs-string">'에게도'</span>,
<span class="hljs-string">'과의'</span>, <span class="hljs-string">'과는'</span>, <span class="hljs-string">'과도'</span>, <span class="hljs-string">'과가'</span>,
<span class="hljs-string">'와의'</span>, <span class="hljs-string">'와는'</span>, <span class="hljs-string">'와도'</span>, <span class="hljs-string">'와가'</span>,
<span class="hljs-string">'보다'</span>, <span class="hljs-string">'보다는'</span>, <span class="hljs-string">'보다도'</span>,
<span class="hljs-string">'부터'</span>, <span class="hljs-string">'까지'</span>, <span class="hljs-string">'만큼'</span>, <span class="hljs-string">'처럼'</span>, <span class="hljs-string">'같이'</span>,
<span class="hljs-string">'이다'</span>, <span class="hljs-string">'입니다'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'이야'</span>, <span class="hljs-string">'이었'</span>,
<span class="hljs-string">'하는'</span>, <span class="hljs-string">'하다'</span>, <span class="hljs-string">'했던'</span>, <span class="hljs-string">'했다'</span>, <span class="hljs-string">'했는'</span>, <span class="hljs-string">'했고'</span>, <span class="hljs-string">'하고'</span>, <span class="hljs-string">'하여'</span>, <span class="hljs-string">'하면'</span>, <span class="hljs-string">'해서'</span>,
<span class="hljs-string">'된다'</span>, <span class="hljs-string">'됩니다'</span>, <span class="hljs-string">'되고'</span>, <span class="hljs-string">'되는'</span>, <span class="hljs-string">'되며'</span>, <span class="hljs-string">'되어'</span>, <span class="hljs-string">'되었'</span>,
<span class="hljs-string">'인'</span>, <span class="hljs-string">'인데'</span>, <span class="hljs-string">'인지'</span>, <span class="hljs-string">'일까'</span>, <span class="hljs-string">'일듯'</span>,
<span class="hljs-string">'이지'</span>, <span class="hljs-string">'이네'</span>, <span class="hljs-string">'이요'</span>, <span class="hljs-string">'이에요'</span>, <span class="hljs-string">'이예요'</span>, <span class="hljs-string">'이어요'</span>, <span class="hljs-string">'이잖아'</span>, <span class="hljs-string">'이구나'</span>, <span class="hljs-string">'이군요'</span>,
<span class="hljs-string">'지만'</span>, <span class="hljs-string">'지요'</span>, <span class="hljs-string">'지는'</span>, <span class="hljs-string">'지도'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지가'</span>, <span class="hljs-string">'지의'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지와'</span>, <span class="hljs-string">'지는'</span>
];
<span class="hljs-keyword">foreach</span> ($common_patterns <span class="hljs-keyword">as</span> $pattern) {
<span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) > mb_strlen($pattern, <span class="hljs-string">'UTF-8'</span>)) {
$suffix = mb_substr($word, -mb_strlen($pattern, <span class="hljs-string">'UTF-8'</span>), <span class="hljs-keyword">null</span>, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">if</span> ($suffix === $pattern) {
<span class="hljs-comment">// 특정 단어에 대한 예외 처리 (예: "대한민국"에서 "한국"은 조사가 아님)</span>
$stem = mb_substr($word, <span class="hljs-number">0</span>, mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) - mb_strlen($pattern, <span class="hljs-string">'UTF-8'</span>), <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">if</span> (mb_strlen($stem, <span class="hljs-string">'UTF-8'</span>) >= <span class="hljs-number">2</span>) {
<span class="hljs-keyword">return</span> <span class="hljs-keyword">true</span>;
}
}
}
}
<span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>;
}
<span class="hljs-comment">/**
* 한국어 조사 및 접속사 제거 함수
* - 조사, 접속사 목록을 확장하고, 단어 끝의 affix를 반복적으로 제거함
*/</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">removeKoreanParticles</span><span class="hljs-params">($word)</span> </span>{
<span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) <= <span class="hljs-number">2</span>) {
<span class="hljs-keyword">return</span> $word;
}
<span class="hljs-comment">// 포괄적인 조사 목록 (복합 형태 포함)</span>
$particles = [
<span class="hljs-string">'에게서'</span>, <span class="hljs-string">'한테서'</span>, <span class="hljs-string">'으로서'</span>, <span class="hljs-string">'으로써'</span>, <span class="hljs-string">'로써'</span>, <span class="hljs-string">'로서'</span>, <span class="hljs-string">'에서부터'</span>, <span class="hljs-string">'부터는'</span>, <span class="hljs-string">'까지는'</span>, <span class="hljs-string">'마저도'</span>,
<span class="hljs-string">'조차도'</span>, <span class="hljs-string">'에게도'</span>, <span class="hljs-string">'한테도'</span>, <span class="hljs-string">'께서도'</span>, <span class="hljs-string">'에서는'</span>, <span class="hljs-string">'로는'</span>, <span class="hljs-string">'로써는'</span>, <span class="hljs-string">'만으로도'</span>, <span class="hljs-string">'만으로는'</span>,
<span class="hljs-string">'에게는'</span>, <span class="hljs-string">'한테는'</span>, <span class="hljs-string">'께서는'</span>, <span class="hljs-string">'으로도'</span>, <span class="hljs-string">'로도'</span>, <span class="hljs-string">'과는'</span>, <span class="hljs-string">'와는'</span>, <span class="hljs-string">'만큼은'</span>, <span class="hljs-string">'처럼은'</span>, <span class="hljs-string">'보다는'</span>,
<span class="hljs-string">'같이는'</span>, <span class="hljs-string">'마냥'</span>, <span class="hljs-string">'만은'</span>, <span class="hljs-string">'만이'</span>, <span class="hljs-string">'뿐만'</span>, <span class="hljs-string">'뿐이'</span>, <span class="hljs-string">'밖에'</span>, <span class="hljs-string">'같은'</span>, <span class="hljs-string">'처럼'</span>, <span class="hljs-string">'만큼'</span>, <span class="hljs-string">'만의'</span>, <span class="hljs-string">'같이'</span>,
<span class="hljs-string">'에게'</span>, <span class="hljs-string">'한테'</span>, <span class="hljs-string">'께서'</span>, <span class="hljs-string">'으로'</span>, <span class="hljs-string">'에서'</span>, <span class="hljs-string">'부터'</span>, <span class="hljs-string">'까지'</span>, <span class="hljs-string">'마저'</span>, <span class="hljs-string">'조차'</span>, <span class="hljs-string">'마다'</span>, <span class="hljs-string">'보다'</span>, <span class="hljs-string">'이나'</span>,
<span class="hljs-string">'만을'</span>, <span class="hljs-string">'만의'</span>, <span class="hljs-string">'만이'</span>, <span class="hljs-string">'에도'</span>, <span class="hljs-string">'이랑'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이자'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이든'</span>, <span class="hljs-string">'이야'</span>, <span class="hljs-string">'만도'</span>,
<span class="hljs-string">'하고'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이랑'</span>, <span class="hljs-string">'이자'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이든'</span>, <span class="hljs-string">'이요'</span>,
<span class="hljs-string">'은'</span>, <span class="hljs-string">'는'</span>, <span class="hljs-string">'이'</span>, <span class="hljs-string">'가'</span>, <span class="hljs-string">'을'</span>, <span class="hljs-string">'를'</span>, <span class="hljs-string">'의'</span>, <span class="hljs-string">'에'</span>, <span class="hljs-string">'과'</span>, <span class="hljs-string">'와'</span>, <span class="hljs-string">'로'</span>, <span class="hljs-string">'며'</span>, <span class="hljs-string">'나'</span>, <span class="hljs-string">'랑'</span>, <span class="hljs-string">'도'</span>, <span class="hljs-string">'만'</span>,
<span class="hljs-string">'야'</span>, <span class="hljs-string">'든'</span>, <span class="hljs-string">'고'</span>, <span class="hljs-string">'요'</span>, <span class="hljs-string">'라'</span>, <span class="hljs-string">'면'</span>, <span class="hljs-string">'씩'</span>, <span class="hljs-string">'뿐'</span>, <span class="hljs-string">'서'</span>, <span class="hljs-string">'께'</span>,
<span class="hljs-string">'들'</span>, <span class="hljs-string">'쯤'</span>,
<span class="hljs-string">'이란'</span>, <span class="hljs-string">'이라는'</span>, <span class="hljs-string">'이라고'</span>, <span class="hljs-string">'이라면'</span>, <span class="hljs-string">'이었던'</span>, <span class="hljs-string">'이었다'</span>, <span class="hljs-string">'이었고'</span>,
<span class="hljs-string">'라는'</span>, <span class="hljs-string">'라고'</span>, <span class="hljs-string">'라면'</span>, <span class="hljs-string">'라도'</span>, <span class="hljs-string">'라서'</span>,
<span class="hljs-string">'이지'</span>, <span class="hljs-string">'이네'</span>, <span class="hljs-string">'이요'</span>, <span class="hljs-string">'이에요'</span>, <span class="hljs-string">'이예요'</span>, <span class="hljs-string">'이어요'</span>, <span class="hljs-string">'이잖아'</span>, <span class="hljs-string">'이구나'</span>, <span class="hljs-string">'이군요'</span>,
<span class="hljs-string">'지만'</span>, <span class="hljs-string">'지요'</span>, <span class="hljs-string">'지는'</span>, <span class="hljs-string">'지도'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지가'</span>, <span class="hljs-string">'지의'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지와'</span>, <span class="hljs-string">'지는'</span>
];
<span class="hljs-comment">// 포괄적인 접속사 목록</span>
$conjunctions = [
<span class="hljs-string">'그리고'</span>, <span class="hljs-string">'그러나'</span>, <span class="hljs-string">'하지만'</span>, <span class="hljs-string">'또한'</span>, <span class="hljs-string">'그런데'</span>, <span class="hljs-string">'따라서'</span>, <span class="hljs-string">'그래서'</span>, <span class="hljs-string">'그러므로'</span>, <span class="hljs-string">'그렇지만'</span>,
<span class="hljs-string">'하물며'</span>, <span class="hljs-string">'뿐만아니라'</span>, <span class="hljs-string">'게다가'</span>, <span class="hljs-string">'아울러'</span>, <span class="hljs-string">'더구나'</span>, <span class="hljs-string">'더욱이'</span>, <span class="hljs-string">'하여튼'</span>, <span class="hljs-string">'어쨌든'</span>, <span class="hljs-string">'결국'</span>,
<span class="hljs-string">'그리하여'</span>, <span class="hljs-string">'따지고보면'</span>, <span class="hljs-string">'요컨대'</span>, <span class="hljs-string">'즉'</span>, <span class="hljs-string">'곧'</span>, <span class="hljs-string">'다시말해'</span>, <span class="hljs-string">'예컨대'</span>, <span class="hljs-string">'왜냐하면'</span>, <span class="hljs-string">'때문에'</span>
];
<span class="hljs-comment">// 접미사 목록</span>
$suffixes = [
<span class="hljs-string">'하다'</span>, <span class="hljs-string">'되다'</span>, <span class="hljs-string">'시키다'</span>, <span class="hljs-string">'당하다'</span>, <span class="hljs-string">'받다'</span>, <span class="hljs-string">'스럽다'</span>, <span class="hljs-string">'적'</span>, <span class="hljs-string">'화'</span>, <span class="hljs-string">'성'</span>, <span class="hljs-string">'형'</span>, <span class="hljs-string">'감'</span>, <span class="hljs-string">'력'</span>, <span class="hljs-string">'미'</span>, <span class="hljs-string">'법'</span>,
<span class="hljs-string">'용'</span>, <span class="hljs-string">'론'</span>, <span class="hljs-string">'식'</span>, <span class="hljs-string">'술'</span>, <span class="hljs-string">'상'</span>, <span class="hljs-string">'증'</span>, <span class="hljs-string">'망'</span>, <span class="hljs-string">'계'</span>, <span class="hljs-string">'층'</span>, <span class="hljs-string">'권'</span>, <span class="hljs-string">'율'</span>, <span class="hljs-string">'가'</span>, <span class="hljs-string">'수'</span>, <span class="hljs-string">'원'</span>, <span class="hljs-string">'장'</span>, <span class="hljs-string">'사'</span>, <span class="hljs-string">'자'</span>,
<span class="hljs-string">'님'</span>, <span class="hljs-string">'씨'</span>, <span class="hljs-string">'군'</span>, <span class="hljs-string">'양'</span>,
<span class="hljs-string">'이다'</span>, <span class="hljs-string">'입니다'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'이야'</span>, <span class="hljs-string">'이었'</span>,
<span class="hljs-string">'하는'</span>, <span class="hljs-string">'하다'</span>, <span class="hljs-string">'했던'</span>, <span class="hljs-string">'했다'</span>, <span class="hljs-string">'했는'</span>, <span class="hljs-string">'했고'</span>, <span class="hljs-string">'하고'</span>, <span class="hljs-string">'하여'</span>, <span class="hljs-string">'하면'</span>, <span class="hljs-string">'해서'</span>,
<span class="hljs-string">'된다'</span>, <span class="hljs-string">'됩니다'</span>, <span class="hljs-string">'되고'</span>, <span class="hljs-string">'되는'</span>, <span class="hljs-string">'되며'</span>, <span class="hljs-string">'되어'</span>, <span class="hljs-string">'되었'</span>,
<span class="hljs-string">'인'</span>, <span class="hljs-string">'인데'</span>, <span class="hljs-string">'인지'</span>, <span class="hljs-string">'일까'</span>, <span class="hljs-string">'일듯'</span>
];
$affixes = array_merge($particles, $conjunctions, $suffixes);
usort($affixes, <span class="hljs-function"><span class="hljs-keyword">function</span><span class="hljs-params">($a, $b)</span> </span>{
<span class="hljs-keyword">return</span> mb_strlen($b, <span class="hljs-string">'UTF-8'</span>) - mb_strlen($a, <span class="hljs-string">'UTF-8'</span>);
});
$originalWord = $word;
$changed = <span class="hljs-keyword">true</span>;
<span class="hljs-keyword">while</span> ($changed) {
$changed = <span class="hljs-keyword">false</span>;
<span class="hljs-keyword">foreach</span> ($affixes <span class="hljs-keyword">as</span> $affix) {
$affixLength = mb_strlen($affix, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) <= $affixLength) {
<span class="hljs-keyword">continue</span>;
}
$ending = mb_substr($word, -$affixLength, <span class="hljs-keyword">null</span>, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">if</span> ($ending === $affix) {
$stem = mb_substr($word, <span class="hljs-number">0</span>, mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) - $affixLength, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">if</span> (mb_strlen($stem, <span class="hljs-string">'UTF-8'</span>) < <span class="hljs-number">2</span>) {
<span class="hljs-keyword">continue</span>;
}
<span class="hljs-comment">// 특정 affix에 대해 음운 규칙 적용</span>
<span class="hljs-keyword">if</span> (in_array($affix, [<span class="hljs-string">'이'</span>, <span class="hljs-string">'은'</span>, <span class="hljs-string">'을'</span>, <span class="hljs-string">'으로'</span>])) {
$lastChar = mb_substr($stem, <span class="hljs-number">-1</span>, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">if</span> (!hasFinalConsonant($lastChar)) {
<span class="hljs-keyword">continue</span>;
}
}
<span class="hljs-keyword">if</span> (in_array($affix, [<span class="hljs-string">'가'</span>, <span class="hljs-string">'는'</span>, <span class="hljs-string">'를'</span>, <span class="hljs-string">'로'</span>])) {
$lastChar = mb_substr($stem, <span class="hljs-number">-1</span>, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">if</span> (hasFinalConsonant($lastChar)) {
<span class="hljs-keyword">continue</span>;
}
}
$word = $stem;
$changed = <span class="hljs-keyword">true</span>;
<span class="hljs-keyword">break</span>;
}
}
}
<span class="hljs-keyword">return</span> ($word !== $originalWord && mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) >= <span class="hljs-number">2</span>)
? $word
: $originalWord;
}
<span class="hljs-comment">/**
* 초성 일치율 계산 함수
* - 두 단어의 초성만 추출하여 비교
*/</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">calculateInitialSimilarity</span><span class="hljs-params">($stx, $candidate)</span> </span>{
<span class="hljs-comment">// 초성 추출</span>
$stx_initials = extractInitials($stx);
$candidate_initials = extractInitials($candidate);
<span class="hljs-comment">// 초성 길이 확인</span>
$stx_len = mb_strlen($stx_initials, <span class="hljs-string">'UTF-8'</span>);
$candidate_len = mb_strlen($candidate_initials, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-comment">// 빈 문자열 처리</span>
<span class="hljs-keyword">if</span> ($stx_len == <span class="hljs-number">0</span> || $candidate_len == <span class="hljs-number">0</span>) {
<span class="hljs-keyword">return</span> <span class="hljs-number">0</span>;
}
<span class="hljs-comment">// 일치하는 초성 개수 세기</span>
$matching_count = <span class="hljs-number">0</span>;
$compare_len = min($stx_len, $candidate_len);
<span class="hljs-keyword">for</span> ($i = <span class="hljs-number">0</span>; $i < $compare_len; $i++) {
$stx_char = mb_substr($stx_initials, $i, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>);
$candidate_char = mb_substr($candidate_initials, $i, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">if</span> ($stx_char === $candidate_char) {
$matching_count++;
}
}
<span class="hljs-comment">// 일치율 계산 (검색어 초성 수 기준)</span>
$similarity = $matching_count / $stx_len;
<span class="hljs-keyword">return</span> $similarity;
}
<span class="hljs-comment">/**
* 전체 텍스트 유사도 계산 (레벤슈타인 거리 기반)
*/</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">calculateTextSimilarity</span><span class="hljs-params">($str1, $str2)</span> </span>{
$str1_len = mb_strlen($str1, <span class="hljs-string">'UTF-8'</span>);
$str2_len = mb_strlen($str2, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-comment">// 빈 문자열 처리</span>
<span class="hljs-keyword">if</span> ($str1_len == <span class="hljs-number">0</span>) <span class="hljs-keyword">return</span> <span class="hljs-number">0</span>;
<span class="hljs-keyword">if</span> ($str2_len == <span class="hljs-number">0</span>) <span class="hljs-keyword">return</span> <span class="hljs-number">0</span>;
<span class="hljs-comment">// 문자열을 배열로 변환</span>
$str1_chars = preg_split(<span class="hljs-string">'//u'</span>, $str1, <span class="hljs-number">-1</span>, PREG_SPLIT_NO_EMPTY);
$str2_chars = preg_split(<span class="hljs-string">'//u'</span>, $str2, <span class="hljs-number">-1</span>, PREG_SPLIT_NO_EMPTY);
<span class="hljs-comment">// 거리 행렬 초기화</span>
$distance = [];
<span class="hljs-keyword">for</span> ($i = <span class="hljs-number">0</span>; $i <= $str1_len; $i++) {
$distance[$i][<span class="hljs-number">0</span>] = $i;
}
<span class="hljs-keyword">for</span> ($j = <span class="hljs-number">0</span>; $j <= $str2_len; $j++) {
$distance[<span class="hljs-number">0</span>][$j] = $j;
}
<span class="hljs-comment">// 레벤슈타인 거리 계산</span>
<span class="hljs-keyword">for</span> ($i = <span class="hljs-number">1</span>; $i <= $str1_len; $i++) {
<span class="hljs-keyword">for</span> ($j = <span class="hljs-number">1</span>; $j <= $str2_len; $j++) {
$cost = ($str1_chars[$i<span class="hljs-number">-1</span>] === $str2_chars[$j<span class="hljs-number">-1</span>]) ? <span class="hljs-number">0</span> : <span class="hljs-number">1</span>;
$distance[$i][$j] = min(
$distance[$i<span class="hljs-number">-1</span>][$j] + <span class="hljs-number">1</span>, <span class="hljs-comment">// 삭제</span>
$distance[$i][$j<span class="hljs-number">-1</span>] + <span class="hljs-number">1</span>, <span class="hljs-comment">// 삽입</span>
$distance[$i<span class="hljs-number">-1</span>][$j<span class="hljs-number">-1</span>] + $cost <span class="hljs-comment">// 교체</span>
);
}
}
<span class="hljs-comment">// 유사도 계산 (0~1 사이 값)</span>
$max_distance = max($str1_len, $str2_len);
$similarity = <span class="hljs-number">1</span> - ($distance[$str1_len][$str2_len] / $max_distance);
<span class="hljs-keyword">return</span> $similarity;
}
<span class="hljs-comment">// 후보 단어 가져오기</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">getCandidates</span><span class="hljs-params">()</span> </span>{
<span class="hljs-keyword">global</span> $stx;
$candidates = [];
<span class="hljs-comment">// 인기 검색어 가져오기</span>
$sql = <span class="hljs-string">"SELECT pp_word FROM g5_popular ORDER BY pp_date DESC, pp_count DESC LIMIT 100"</span>;
$result = sql_query($sql);
<span class="hljs-keyword">while</span> ($row = sql_fetch_array($result)) {
$candidates[] = $row[<span class="hljs-string">'pp_word'</span>];
}
<span class="hljs-comment">// 게시판에서 검색어 가져오기</span>
$boards = [<span class="hljs-string">'g5_write_code'</span>, <span class="hljs-string">'g5_write_free'</span>, <span class="hljs-string">'g5_write_gallery'</span>, <span class="hljs-string">'g5_write_it'</span>, <span class="hljs-string">'g5_write_notice'</span>, <span class="hljs-string">'g5_write_blog'</span>];
<span class="hljs-keyword">foreach</span> ($boards <span class="hljs-keyword">as</span> $board) {
<span class="hljs-keyword">try</span> {
$safe_stx = sql_escape_string($stx);
$sql = <span class="hljs-string">"SELECT wr_subject, wr_content FROM $board LIMIT 300"</span>;
$result = sql_query($sql);
<span class="hljs-keyword">while</span> ($row = sql_fetch_array($result)) {
<span class="hljs-comment">// 제목에서 단어 추출</span>
preg_match_all(<span class="hljs-string">'/[\pL\pN]+/u'</span>, $row[<span class="hljs-string">'wr_subject'</span>], $matches);
<span class="hljs-keyword">foreach</span> ($matches[<span class="hljs-number">0</span>] <span class="hljs-keyword">as</span> $word) {
<span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) >= <span class="hljs-number">2</span>) {
$candidates[] = $word;
}
}
<span class="hljs-comment">// 내용에서도 단어 추출</span>
preg_match_all(<span class="hljs-string">'/[\pL\pN]+/u'</span>, strip_tags($row[<span class="hljs-string">'wr_content'</span>]), $matches);
<span class="hljs-keyword">foreach</span> ($matches[<span class="hljs-number">0</span>] <span class="hljs-keyword">as</span> $word) {
<span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) >= <span class="hljs-number">2</span>) {
$candidates[] = $word;
}
}
}
} <span class="hljs-keyword">catch</span> (<span class="hljs-keyword">Exception</span> $e) {
<span class="hljs-keyword">continue</span>;
}
}
<span class="hljs-keyword">return</span> array_unique($candidates);
}
<span class="hljs-comment">// 조사 제거 및 필터링된 후보 단어 가져오기</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">getCleanCandidates</span><span class="hljs-params">($candidates)</span> </span>{
$clean_candidates = [];
<span class="hljs-keyword">foreach</span> ($candidates <span class="hljs-keyword">as</span> $term) {
<span class="hljs-comment">// 조사가 포함된 단어는 추가하지 않음</span>
<span class="hljs-keyword">if</span> (hasParticleOrSuffix($term)) {
<span class="hljs-keyword">continue</span>;
}
$clean_term = removeKoreanParticles($term);
<span class="hljs-comment">// 조사를 제거한 후에도 여전히 조사가 붙어있으면 제외</span>
<span class="hljs-keyword">if</span> (hasParticleOrSuffix($clean_term)) {
<span class="hljs-keyword">continue</span>;
}
<span class="hljs-comment">// 너무 짧은 단어 제외 (2글자 이상만 포함)</span>
<span class="hljs-keyword">if</span> (mb_strlen($clean_term, <span class="hljs-string">'UTF-8'</span>) >= <span class="hljs-number">2</span>) {
$clean_candidates[$clean_term] = <span class="hljs-keyword">true</span>; <span class="hljs-comment">// 중복 제거를 위해 키로 사용</span>
}
}
<span class="hljs-keyword">return</span> array_keys($clean_candidates);
}
<span class="hljs-comment">// 연관 검색어 가져오기 (search_suggest.php 호출)</span>
$related_terms = [];
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, G5_BBS_URL . <span class="hljs-string">'/search_suggest.php?stx='</span> . urlencode($stx));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, <span class="hljs-keyword">true</span>);
$response = curl_exec($ch);
curl_close($ch);
<span class="hljs-keyword">if</span> ($response) {
$related_terms = json_decode($response, <span class="hljs-keyword">true</span>) ?: [];
}
<span class="hljs-comment">// 검색어에서 조사 제거</span>
$clean_stx = removeKoreanParticles($stx);
<span class="hljs-comment">// 모든 후보 단어 가져오기</span>
$all_candidates = array_merge($related_terms, getCandidates());
$clean_candidates = getCleanCandidates($all_candidates);
<span class="hljs-comment">// stx와 정확히 일치하는 단어가 있는지 확인</span>
$exact_match_exists = <span class="hljs-keyword">false</span>;
<span class="hljs-keyword">foreach</span> ($clean_candidates <span class="hljs-keyword">as</span> $term) {
<span class="hljs-keyword">if</span> (strtolower($term) === strtolower($clean_stx)) {
$exact_match_exists = <span class="hljs-keyword">true</span>;
<span class="hljs-keyword">break</span>;
}
}
<span class="hljs-comment">// 결과 변수 초기화</span>
$correction = <span class="hljs-string">''</span>;
$related = [];
<span class="hljs-comment">// 한글이 포함된 경우에만 처리</span>
<span class="hljs-keyword">if</span> (preg_match(<span class="hljs-string">'/[\x{AC00}-\x{D7A3}]/u'</span>, $stx)) {
$stx_initials = extractInitials($clean_stx);
<span class="hljs-comment">// 초성이 일치하는 단어들 찾기</span>
$initial_matches = [];
<span class="hljs-keyword">foreach</span> ($clean_candidates <span class="hljs-keyword">as</span> $term) {
<span class="hljs-comment">// 자기 자신은 건너뛰기</span>
<span class="hljs-keyword">if</span> (strtolower($term) === strtolower($clean_stx)) {
<span class="hljs-keyword">continue</span>;
}
<span class="hljs-comment">// 초성 유사도 계산</span>
$similarity = calculateInitialSimilarity($clean_stx, $term);
<span class="hljs-keyword">if</span> ($similarity >= <span class="hljs-number">0.75</span>) { <span class="hljs-comment">// 75% 이상 일치</span>
$initial_matches[$term] = $similarity;
}
}
<span class="hljs-comment">// 정확히 일치하는 단어가 없는 경우: 맞춤법 교정 제안</span>
<span class="hljs-keyword">if</span> (!$exact_match_exists && !<span class="hljs-keyword">empty</span>($initial_matches)) {
<span class="hljs-comment">// 유사도 기준으로 정렬</span>
arsort($initial_matches);
$correction = key($initial_matches); <span class="hljs-comment">// 가장 유사도가 높은 첫 번째 단어</span>
}
<span class="hljs-comment">// 정확히 일치하는 단어가 있는 경우: 연관 검색어 제안</span>
<span class="hljs-keyword">if</span> ($exact_match_exists && !<span class="hljs-keyword">empty</span>($initial_matches)) {
<span class="hljs-comment">// 초성 기반 연관 검색어</span>
$related = array_keys($initial_matches);
}
<span class="hljs-comment">// 정확히 일치하는 단어도 없고 초성 일치하는 단어도 없는 경우: 텍스트 유사도 기반 추천</span>
<span class="hljs-keyword">if</span> (!$exact_match_exists && <span class="hljs-keyword">empty</span>($initial_matches)) {
$text_matches = [];
<span class="hljs-keyword">foreach</span> ($clean_candidates <span class="hljs-keyword">as</span> $term) {
$similarity = calculateTextSimilarity($clean_stx, $term);
<span class="hljs-keyword">if</span> ($similarity >= <span class="hljs-number">0.85</span>) { <span class="hljs-comment">// 85% 이상 일치</span>
$text_matches[$term] = $similarity;
}
}
<span class="hljs-comment">// 유사도 기준으로 정렬</span>
arsort($text_matches);
<span class="hljs-comment">// 연관 검색어 설정</span>
$related = array_keys($text_matches);
}
}
<span class="hljs-comment">// 맞춤법 교정이 있고 조사가 붙어있으면 제거</span>
<span class="hljs-keyword">if</span> (!<span class="hljs-keyword">empty</span>($correction) && hasParticleOrSuffix($correction)) {
$correction = removeKoreanParticles($correction);
<span class="hljs-comment">// 여전히 조사가 있으면 무시</span>
<span class="hljs-keyword">if</span> (hasParticleOrSuffix($correction)) {
$correction = <span class="hljs-string">''</span>;
}
}
<span class="hljs-comment">// 연관 검색어 필터링 (조사가 있거나 검색어와 동일한 경우 제외)</span>
$filtered_related = [];
<span class="hljs-keyword">foreach</span> ($related <span class="hljs-keyword">as</span> $term) {
<span class="hljs-keyword">if</span> (strtolower($term) !== strtolower($clean_stx) && !hasParticleOrSuffix($term)) {
$filtered_related[] = $term;
}
}
<span class="hljs-comment">// 결과 반환</span>
<span class="hljs-keyword">echo</span> json_encode([
<span class="hljs-string">'correction'</span> => $correction,
<span class="hljs-string">'related'</span> => array_slice($filtered_related, <span class="hljs-number">0</span>, <span class="hljs-number">5</span>) <span class="hljs-comment">// 최대 5개 연관 검색어 반환</span>
]);
<span class="hljs-meta">?></span></span></code><code class="hljs" contenteditable="true"><br></code></pre></div><p><br></p><p><br></p><div class="t2-code-block"><pre><code class="hljs xml" contenteditable="true"><span class="php"><span class="hljs-meta"><?php</span>
<span class="hljs-keyword">include_once</span>(<span class="hljs-string">'./_common.php'</span>);
<span class="hljs-comment">// stx 파라미터 값 처리</span>
$stx = <span class="hljs-keyword">isset</span>($_GET[<span class="hljs-string">'stx'</span>]) ? trim($_GET[<span class="hljs-string">'stx'</span>]) : <span class="hljs-string">''</span>;
header(<span class="hljs-string">'Content-Type: application/json'</span>);
<span class="hljs-keyword">if</span> (<span class="hljs-keyword">empty</span>($stx) || mb_strlen($stx, <span class="hljs-string">'UTF-8'</span>) < <span class="hljs-number">2</span>) {
<span class="hljs-keyword">echo</span> json_encode([<span class="hljs-string">'correction'</span> => <span class="hljs-string">''</span>, <span class="hljs-string">'related'</span> => []]);
<span class="hljs-keyword">exit</span>;
}
<span class="hljs-comment">// 한글 문자를 위한 유니코드 변환 함수 (캐싱 불필요, 단순 계산 함수로 유지)</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">uniord</span><span class="hljs-params">($c)</span> </span>{
$h = ord($c[<span class="hljs-number">0</span>]);
<span class="hljs-keyword">if</span> ($h <= <span class="hljs-number">0x7F</span>) <span class="hljs-keyword">return</span> $h;
<span class="hljs-keyword">if</span> ($h < <span class="hljs-number">0xC2</span>) <span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>;
<span class="hljs-keyword">if</span> ($h <= <span class="hljs-number">0xDF</span>) <span class="hljs-keyword">return</span> (($h & <span class="hljs-number">0x1F</span>) << <span class="hljs-number">6</span>) | (ord($c[<span class="hljs-number">1</span>]) & <span class="hljs-number">0x3F</span>);
<span class="hljs-keyword">if</span> ($h <= <span class="hljs-number">0xEF</span>) <span class="hljs-keyword">return</span> (($h & <span class="hljs-number">0x0F</span>) << <span class="hljs-number">12</span>) | ((ord($c[<span class="hljs-number">1</span>]) & <span class="hljs-number">0x3F</span>) << <span class="hljs-number">6</span>) | (ord($c[<span class="hljs-number">2</span>]) & <span class="hljs-number">0x3F</span>);
<span class="hljs-keyword">if</span> ($h <= <span class="hljs-number">0xF4</span>) <span class="hljs-keyword">return</span> (($h & <span class="hljs-number">0x07</span>) << <span class="hljs-number">18</span>) | ((ord($c[<span class="hljs-number">1</span>]) & <span class="hljs-number">0x3F</span>) << <span class="hljs-number">12</span>) | ((ord($c[<span class="hljs-number">2</span>]) & <span class="hljs-number">0x3F</span>) << <span class="hljs-number">6</span>) | (ord($c[<span class="hljs-number">3</span>]) & <span class="hljs-number">0x3F</span>);
<span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>;
}
<span class="hljs-comment">// 초성 추출 함수 (불필요한 변수 최소화)</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">extractInitials</span><span class="hljs-params">($text)</span> </span>{
<span class="hljs-keyword">static</span> $choseong = [<span class="hljs-string">'ㄱ'</span>, <span class="hljs-string">'ㄲ'</span>, <span class="hljs-string">'ㄴ'</span>, <span class="hljs-string">'ㄷ'</span>, <span class="hljs-string">'ㄸ'</span>, <span class="hljs-string">'ㄹ'</span>, <span class="hljs-string">'ㅁ'</span>, <span class="hljs-string">'ㅂ'</span>, <span class="hljs-string">'ㅃ'</span>, <span class="hljs-string">'ㅅ'</span>, <span class="hljs-string">'ㅆ'</span>, <span class="hljs-string">'ㅇ'</span>, <span class="hljs-string">'ㅈ'</span>, <span class="hljs-string">'ㅉ'</span>, <span class="hljs-string">'ㅊ'</span>, <span class="hljs-string">'ㅋ'</span>, <span class="hljs-string">'ㅌ'</span>, <span class="hljs-string">'ㅍ'</span>, <span class="hljs-string">'ㅎ'</span>];
$initials = <span class="hljs-string">''</span>;
$text_len = mb_strlen($text, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">for</span> ($i = <span class="hljs-number">0</span>; $i < $text_len; $i++) {
$char = mb_substr($text, $i, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>);
$code = uniord($char);
<span class="hljs-keyword">if</span> ($code >= <span class="hljs-number">0xAC00</span> && $code <= <span class="hljs-number">0xD7A3</span>) {
$initials .= $choseong[intval(($code - <span class="hljs-number">0xAC00</span>) / <span class="hljs-number">588</span>)]; <span class="hljs-comment">// 588 = 28 * 21</span>
} <span class="hljs-keyword">else</span> {
$initials .= $char;
}
}
<span class="hljs-keyword">return</span> $initials;
}
<span class="hljs-comment">// 받침(종성) 검사 함수 (간소화)</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">hasFinalConsonant</span><span class="hljs-params">($char)</span> </span>{
$code = uniord($char);
<span class="hljs-keyword">return</span> ($code >= <span class="hljs-number">0xAC00</span> && $code <= <span class="hljs-number">0xD7A3</span>) ? (($code - <span class="hljs-number">0xAC00</span>) % <span class="hljs-number">28</span>) !== <span class="hljs-number">0</span> : <span class="hljs-keyword">false</span>;
}
<span class="hljs-comment">// 조사/접미사 검사 함수 (정적 배열로 최적화)</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">hasParticleOrSuffix</span><span class="hljs-params">($word)</span> </span>{
<span class="hljs-keyword">static</span> $patterns = [
<span class="hljs-string">'이란'</span>, <span class="hljs-string">'이라는'</span>, <span class="hljs-string">'이라고'</span>, <span class="hljs-string">'이라면'</span>, <span class="hljs-string">'이었던'</span>, <span class="hljs-string">'이었다'</span>, <span class="hljs-string">'이었고'</span>, <span class="hljs-string">'으로'</span>, <span class="hljs-string">'으로써'</span>, <span class="hljs-string">'으로서'</span>, <span class="hljs-string">'으로는'</span>, <span class="hljs-string">'으로도'</span>,
<span class="hljs-string">'에서'</span>, <span class="hljs-string">'에서는'</span>, <span class="hljs-string">'에서도'</span>, <span class="hljs-string">'에서만'</span>, <span class="hljs-string">'에서의'</span>, <span class="hljs-string">'라는'</span>, <span class="hljs-string">'라고'</span>, <span class="hljs-string">'라면'</span>, <span class="hljs-string">'라도'</span>, <span class="hljs-string">'라서'</span>, <span class="hljs-string">'에게'</span>, <span class="hljs-string">'에게는'</span>, <span class="hljs-string">'에게서'</span>,
<span class="hljs-string">'에게도'</span>, <span class="hljs-string">'과의'</span>, <span class="hljs-string">'과는'</span>, <span class="hljs-string">'과도'</span>, <span class="hljs-string">'과가'</span>, <span class="hljs-string">'와의'</span>, <span class="hljs-string">'와는'</span>, <span class="hljs-string">'와도'</span>, <span class="hljs-string">'와가'</span>, <span class="hljs-string">'보다'</span>, <span class="hljs-string">'보다는'</span>, <span class="hljs-string">'보다도'</span>, <span class="hljs-string">'부터'</span>, <span class="hljs-string">'까지'</span>,
<span class="hljs-string">'만큼'</span>, <span class="hljs-string">'처럼'</span>, <span class="hljs-string">'같이'</span>, <span class="hljs-string">'이다'</span>, <span class="hljs-string">'입니다'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'이야'</span>, <span class="hljs-string">'이었'</span>, <span class="hljs-string">'하는'</span>, <span class="hljs-string">'하다'</span>, <span class="hljs-string">'했던'</span>, <span class="hljs-string">'했다'</span>,
<span class="hljs-string">'했는'</span>, <span class="hljs-string">'했고'</span>, <span class="hljs-string">'하고'</span>, <span class="hljs-string">'하여'</span>, <span class="hljs-string">'하면'</span>, <span class="hljs-string">'해서'</span>, <span class="hljs-string">'된다'</span>, <span class="hljs-string">'됩니다'</span>, <span class="hljs-string">'되고'</span>, <span class="hljs-string">'되는'</span>, <span class="hljs-string">'되며'</span>, <span class="hljs-string">'되어'</span>, <span class="hljs-string">'되었'</span>, <span class="hljs-string">'인'</span>,
<span class="hljs-string">'인데'</span>, <span class="hljs-string">'인지'</span>, <span class="hljs-string">'일까'</span>, <span class="hljs-string">'일듯'</span>, <span class="hljs-string">'이지'</span>, <span class="hljs-string">'이네'</span>, <span class="hljs-string">'이요'</span>, <span class="hljs-string">'이에요'</span>, <span class="hljs-string">'이예요'</span>, <span class="hljs-string">'이어요'</span>, <span class="hljs-string">'이잖아'</span>, <span class="hljs-string">'이구나'</span>, <span class="hljs-string">'이군요'</span>,
<span class="hljs-string">'지만'</span>, <span class="hljs-string">'지요'</span>, <span class="hljs-string">'지는'</span>, <span class="hljs-string">'지도'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지가'</span>, <span class="hljs-string">'지의'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지와'</span>, <span class="hljs-string">'지는'</span>
];
$word_len = mb_strlen($word, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">foreach</span> ($patterns <span class="hljs-keyword">as</span> $pattern) {
$p_len = mb_strlen($pattern, <span class="hljs-string">'UTF-8'</span>);
<span class="hljs-keyword">if</span> ($word_len > $p_len && mb_substr($word, -$p_len) === $pattern) {
<span class="hljs-keyword">return</span> mb_strlen(mb_substr($word, <span class="hljs-number">0</span>, $word_len - $p_len), <span class="hljs-string">'UTF-8'</span>) >= <span class="hljs-number">2</span>;
}
}
<span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>;
}
<span class="hljs-comment">// 조사 및 접속사 제거 함수 (정적 배열 및 최소화된 루프)</span>
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">removeKoreanParticles</span><span class="hljs-params">($word)</span> </span>{
<span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) <= <span class="hljs-number">2</span>) <span class="hljs-keyword">return</span> $word;
<span class="hljs-keyword">static</span> $affixes = <span class="hljs-keyword">null</span>;
<span class="hljs-keyword">if</span> ($affixes === <span class="hljs-keyword">null</span>) {
$affixes = array_merge(
[<span class="hljs-string">'에게서'</span>, <span class="hljs-string">'한테서'</span>, <span class="hljs-string">'으로서'</span>, <span class="hljs-string">'으로써'</span>, <span class="hljs-string">'로써'</span>, <span class="hljs-string">'로서'</span>, <span class="hljs-string">'에서부터'</span>, <span class="hljs-string">'부터는'</span>, <span class="hljs-string">'까지는'</span>, <span class="hljs-string">'마저도'</span>, <span class="hljs-string">'조차도'</span>, <span class="hljs-string">'에게도'</span>, <span class="hljs-string">'한테도'</span>, <span class="hljs-string">'께서도'</span>, <span class="hljs-string">'에서는'</span>, <span class="hljs-string">'로는'</span>, <span class="hljs-string">'로써는'</span>, <span class="hljs-string">'만으로도'</span>, <span class="hljs-string">'만으로는'</span>, <span class="hljs-string">'에게는'</span>, <span class="hljs-string">'한테는'</span>, <span class="hljs-string">'께서는'</span>, <span class="hljs-string">'으로도'</span>, <span class="hljs-string">'로도'</span>, <span class="hljs-string">'과는'</span>, <span class="hljs-string">'와는'</span>, <span class="hljs-string">'만큼은'</span>, <span class="hljs-string">'처럼은'</span>, <span class="hljs-string">'보다는'</span>, <span class="hljs-string">'같이는'</span>, <span class="hljs-string">'마냥'</span>, <span class="hljs-string">'만은'</span>, <span class="hljs-string">'만이'</span>, <span class="hljs-string">'뿐만'</span>, <span class="hljs-string">'뿐이'</span>, <span class="hljs-string">'밖에'</span>, <span class="hljs-string">'같은'</span>, <span class="hljs-string">'처럼'</span>, <span class="hljs-string">'만큼'</span>, <span class="hljs-string">'만의'</span>, <span class="hljs-string">'같이'</span>, <span class="hljs-string">'에게'</span>, <span class="hljs-string">'한테'</span>, <span class="hljs-string">'께서'</span>, <span class="hljs-string">'으로'</span>, <span class="hljs-string">'에서'</span>, <span class="hljs-string">'부터'</span>, <span class="hljs-string">'까지'</span>, <span class="hljs-string">'마저'</span>, <span class="hljs-string">'조차'</span>, <span class="hljs-string">'마다'</span>, <span class="hljs-string">'보다'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'만을'</span>, <span class="hljs-string">'만의'</span>, <span class="hljs-string">'만이'</span>, <span class="hljs-string">'에도'</span>, <span class="hljs-string">'이랑'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이자'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이든'</span>, <span class="hljs-string">'이야'</span>, <span class="hljs-string">'만도'</span>, <span class="hljs-string">'하고'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이랑'</span>, <span class="hljs-string">'이자'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이든'</span>, <span class="hljs-string">'이요'</span>, <span class="hljs-string">'은'</span>, <span class="hljs-string">'는'</span>, <span class="hljs-string">'이'</span>, <span class="hljs-string">'가'</span>, <span class="hljs-string">'을'</span>, <span class="hljs-string">'를'</span>, <span class="hljs-string">'의'</span>, <span class="hljs-string">'에'</span>, <span class="hljs-string">'과'</span>, <span class="hljs-string">'와'</span>, <span class="hljs-string">'로'</span>, <span class="hljs-string">'며'</span>, <span class="hljs-string">'나'</span>, <span class="hljs-string">'랑'</span>, <span class="hljs-string">'도'</span>, <span class="hljs-string">'만'</span>, <span class="hljs-string">'야'</span>, <span class="hljs-string">'든'</span>, <span class="hljs-string">'고'</span>, <span class="hljs-string">'요'</span>, <span class="hljs-string">'라'</span>, <span class="hljs-string">'면'</span>, <span class="hljs-string">'씩'</span>, <span class="hljs-string">'뿐'</span>, <span class="hljs-string">'서'</span>, <span class="hljs-string">'께'</span>, <span class="hljs-string">'들'</span>, <span class="hljs-string">'쯤'</span>, <span class="hljs-string">'이란'</span>, <span class="hljs-string">'이라는'</span>, <span class="hljs-string">'이라고'</span>, <span class="hljs-string">'이라면'</span>, <span class="hljs-string">'이었던'</span>, <span class="hljs-string">'이었다'</span>, <span class="hljs-string">'이었고'</span>, <span class="hljs-string">'라는'</span>, <span class="hljs-string">'라고'</span>, <span class="hljs-string">'라면'</span>, <span class="hljs-string">'라도'</span>, <span class="hljs-string">'라서'</span>, <span class="hljs-string">'이지'</span>, <span class="hljs-string">'이네'</span>, <span class="hljs-string">'이요'</span>, <span class="hljs-string">'이에요'</span>, <span class="hljs-string">'이예요'</span>, <span class="hljs-string">'이어요'</span>, <span class="hljs-string">'이잖아'</span>, <span class="hljs-string">'이구나'</span>, <span class="hljs-string">'이군요'</span>, <span class="hljs-string">'지만'</span>, <span class="hljs-string">'지요'</span>, <span class="hljs-string">'지는'</span>, <span class="hljs-string">'지도'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지가'</span>, <span class="hljs-string">'지의'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지와'</span>, <span class="hljs-string">'지는'</span>],
[<span class="hljs-string">'그리고'</span>, <span class="hljs-string">'그러나'</span>, <span class="hljs-string">'하지만'</span>, <span class="hljs-string">'또한'</span>, <span class="hljs-string">'그런데'</span>, <span class="hljs-string">'따라서'</span>, <span class="hljs-string">'그래서'</span>, <span class="hljs-string">'그러므로'</span>, <span class="hljs-string">'그렇지만'</span>, <span class="hljs-string">'하물며'</span>, <span class="hljs-string">'뿐만아니라'</span>, <span class="hljs-string">'게다가'</span>, <span class="hljs-string">'아울러'</span>, <span class="hljs-string">'더구나'</span>, <span class="hljs-string">'더욱이'</span>, <span class="hljs-string">'하여튼'</span>, <span class="hljs-string">'어쨌든'</span>, <span class="hljs-string">'결국'</span>, <span class="hljs-string">'그리하여'</span>, <span class="hljs-string">'따지고보면'</span>, <span class="hljs-string">'요컨대'</span>, <span class="hljs-string">'즉'</span>, <span class="hljs-string">'곧'</span>, <span class="hljs-string">'다시말해'</span>, <span class="hljs-string">'예컨대'</span>, <span class="hljs-string">'왜냐하면'</span>, <span class="hljs-string">'때문에'</span>],
[<span class="hljs-string">'하다'</span>, <span class="hljs-string">'되다'</span>, <span class="hljs-string">'시키다'</span>, <span class="hljs-string">'당하다'</span>, <span class="hljs-string">'받다'</span>, <span class="hljs-string">'스럽다'</span>, <span class="hljs-string">'적'</span>, <span class="hljs-string">'화'</span>, <span class="hljs-string">'성'</span>, <span class="hljs-string">'형'</span>, <span class="hljs-string">'감'</span>, <span class="hljs-string">'력'</span>, <span class="hljs-string">'미'</span>, <span class="hljs-string">'법'</span>, <span class="hljs-string">'용'</span>, <span class="hljs-string">'론'</span>, <span class="hljs-string">'식'</span>, <span class="hljs-string">'술'</span>, <span class="hljs-string">'상'</span>, <span class="hljs-string">'증'</span>, <span class="hljs-string">'망'</span>, <span class="hljs-string">'계'</span>, <span class="hljs-string">'층'</span>, <span class=