DSc Neural Language Engine

비밀글로 보호된 게시글입니다

← 홈으로
최고관리자 · 2025.04.13 · 조회 3
DSc Neural Language Engine
<p><br></p><p><br></p><div class="t2-code-block"><pre><code class="hljs xml" contenteditable="true"><span class="php"><span class="hljs-meta">&lt;?php</span> <span class="hljs-keyword">include_once</span>(<span class="hljs-string">'./_common.php'</span>); <span class="hljs-comment">// stx 파라미터 값 처리</span> $stx = <span class="hljs-keyword">isset</span>($_GET[<span class="hljs-string">'stx'</span>]) ? trim($_GET[<span class="hljs-string">'stx'</span>]) : <span class="hljs-string">''</span>; header(<span class="hljs-string">'Content-Type: application/json'</span>); <span class="hljs-keyword">if</span> (<span class="hljs-keyword">empty</span>($stx) || mb_strlen($stx, <span class="hljs-string">'UTF-8'</span>) &lt; <span class="hljs-number">2</span>) { <span class="hljs-keyword">echo</span> json_encode([<span class="hljs-string">'correction'</span> =&gt; <span class="hljs-string">''</span>, <span class="hljs-string">'related'</span> =&gt; []]); <span class="hljs-keyword">exit</span>; } <span class="hljs-comment">// 한글 문자를 위한 유니코드 변환 함수</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">uniord</span><span class="hljs-params">($c)</span> </span>{ $h = ord($c[<span class="hljs-number">0</span>]); <span class="hljs-keyword">if</span> ($h &lt;= <span class="hljs-number">0x7F</span>) { <span class="hljs-keyword">return</span> $h; } <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> ($h &lt; <span class="hljs-number">0xC2</span>) { <span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>; } <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> ($h &lt;= <span class="hljs-number">0xDF</span>) { <span class="hljs-keyword">return</span> (($h &amp; <span class="hljs-number">0x1F</span>) &lt;&lt; <span class="hljs-number">6</span>) | (ord($c[<span class="hljs-number">1</span>]) &amp; <span class="hljs-number">0x3F</span>); } <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> ($h &lt;= <span class="hljs-number">0xEF</span>) { <span class="hljs-keyword">return</span> (($h &amp; <span class="hljs-number">0x0F</span>) &lt;&lt; <span class="hljs-number">12</span>) | ((ord($c[<span class="hljs-number">1</span>]) &amp; <span class="hljs-number">0x3F</span>) &lt;&lt; <span class="hljs-number">6</span>) | (ord($c[<span class="hljs-number">2</span>]) &amp; <span class="hljs-number">0x3F</span>); } <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> ($h &lt;= <span class="hljs-number">0xF4</span>) { <span class="hljs-keyword">return</span> (($h &amp; <span class="hljs-number">0x07</span>) &lt;&lt; <span class="hljs-number">18</span>) | ((ord($c[<span class="hljs-number">1</span>]) &amp; <span class="hljs-number">0x3F</span>) &lt;&lt; <span class="hljs-number">12</span>) | ((ord($c[<span class="hljs-number">2</span>]) &amp; <span class="hljs-number">0x3F</span>) &lt;&lt; <span class="hljs-number">6</span>) | (ord($c[<span class="hljs-number">3</span>]) &amp; <span class="hljs-number">0x3F</span>); } <span class="hljs-keyword">else</span> { <span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>; } } <span class="hljs-comment">/** * 한글 문자열에서 초성만 추출하는 함수 */</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">extractInitials</span><span class="hljs-params">($text)</span> </span>{ $initials = <span class="hljs-string">''</span>; $text_len = mb_strlen($text, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">for</span> ($i = <span class="hljs-number">0</span>; $i &lt; $text_len; $i++) { $char = mb_substr($text, $i, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>); $code = uniord($char); <span class="hljs-comment">// 한글 유니코드 범위 (AC00-D7A3)</span> <span class="hljs-keyword">if</span> ($code &gt;= <span class="hljs-number">0xAC00</span> &amp;&amp; $code &lt;= <span class="hljs-number">0xD7A3</span>) { <span class="hljs-comment">// 초성 추출 공식: ((문자코드 - 0xAC00) / 28) / 21</span> $choseong_index = intval(($code - <span class="hljs-number">0xAC00</span>) / <span class="hljs-number">28</span> / <span class="hljs-number">21</span>); <span class="hljs-comment">// 초성 배열 (19개)</span> $choseong = <span class="hljs-keyword">array</span>(<span class="hljs-string">'ㄱ'</span>, <span class="hljs-string">'ㄲ'</span>, <span class="hljs-string">'ㄴ'</span>, <span class="hljs-string">'ㄷ'</span>, <span class="hljs-string">'ㄸ'</span>, <span class="hljs-string">'ㄹ'</span>, <span class="hljs-string">'ㅁ'</span>, <span class="hljs-string">'ㅂ'</span>, <span class="hljs-string">'ㅃ'</span>, <span class="hljs-string">'ㅅ'</span>, <span class="hljs-string">'ㅆ'</span>, <span class="hljs-string">'ㅇ'</span>, <span class="hljs-string">'ㅈ'</span>, <span class="hljs-string">'ㅉ'</span>, <span class="hljs-string">'ㅊ'</span>, <span class="hljs-string">'ㅋ'</span>, <span class="hljs-string">'ㅌ'</span>, <span class="hljs-string">'ㅍ'</span>, <span class="hljs-string">'ㅎ'</span>); <span class="hljs-comment">// 초성 추가</span> $initials .= $choseong[$choseong_index]; } <span class="hljs-keyword">else</span> { <span class="hljs-comment">// 한글이 아닌 경우 그대로 추가</span> $initials .= $char; } } <span class="hljs-keyword">return</span> $initials; } <span class="hljs-comment">/** * 한글 글자에 받침(종성)이 존재하는지 검사 */</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">hasFinalConsonant</span><span class="hljs-params">($char)</span> </span>{ $code = uniord($char); <span class="hljs-keyword">if</span> ($code &gt;= <span class="hljs-number">0xAC00</span> &amp;&amp; $code &lt;= <span class="hljs-number">0xD7A3</span>) { <span class="hljs-keyword">return</span> (($code - <span class="hljs-number">0xAC00</span>) % <span class="hljs-number">28</span>) !== <span class="hljs-number">0</span>; } <span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>; } <span class="hljs-comment">/** * 한국어 단어에서 조사나 접미사가 붙어있는지 확인 * 자주 발생하는 특정 패턴에 대한 검사 추가 */</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">hasParticleOrSuffix</span><span class="hljs-params">($word)</span> </span>{ <span class="hljs-comment">// 흔한 조사/접미사 패턴 목록</span> $common_patterns = [ <span class="hljs-string">'이란'</span>, <span class="hljs-string">'이라는'</span>, <span class="hljs-string">'이라고'</span>, <span class="hljs-string">'이라면'</span>, <span class="hljs-string">'이었던'</span>, <span class="hljs-string">'이었다'</span>, <span class="hljs-string">'이었고'</span>, <span class="hljs-string">'으로'</span>, <span class="hljs-string">'으로써'</span>, <span class="hljs-string">'으로서'</span>, <span class="hljs-string">'으로는'</span>, <span class="hljs-string">'으로도'</span>, <span class="hljs-string">'에서'</span>, <span class="hljs-string">'에서는'</span>, <span class="hljs-string">'에서도'</span>, <span class="hljs-string">'에서만'</span>, <span class="hljs-string">'에서의'</span>, <span class="hljs-string">'라는'</span>, <span class="hljs-string">'라고'</span>, <span class="hljs-string">'라면'</span>, <span class="hljs-string">'라도'</span>, <span class="hljs-string">'라서'</span>, <span class="hljs-string">'에게'</span>, <span class="hljs-string">'에게는'</span>, <span class="hljs-string">'에게서'</span>, <span class="hljs-string">'에게도'</span>, <span class="hljs-string">'과의'</span>, <span class="hljs-string">'과는'</span>, <span class="hljs-string">'과도'</span>, <span class="hljs-string">'과가'</span>, <span class="hljs-string">'와의'</span>, <span class="hljs-string">'와는'</span>, <span class="hljs-string">'와도'</span>, <span class="hljs-string">'와가'</span>, <span class="hljs-string">'보다'</span>, <span class="hljs-string">'보다는'</span>, <span class="hljs-string">'보다도'</span>, <span class="hljs-string">'부터'</span>, <span class="hljs-string">'까지'</span>, <span class="hljs-string">'만큼'</span>, <span class="hljs-string">'처럼'</span>, <span class="hljs-string">'같이'</span>, <span class="hljs-string">'이다'</span>, <span class="hljs-string">'입니다'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'이야'</span>, <span class="hljs-string">'이었'</span>, <span class="hljs-string">'하는'</span>, <span class="hljs-string">'하다'</span>, <span class="hljs-string">'했던'</span>, <span class="hljs-string">'했다'</span>, <span class="hljs-string">'했는'</span>, <span class="hljs-string">'했고'</span>, <span class="hljs-string">'하고'</span>, <span class="hljs-string">'하여'</span>, <span class="hljs-string">'하면'</span>, <span class="hljs-string">'해서'</span>, <span class="hljs-string">'된다'</span>, <span class="hljs-string">'됩니다'</span>, <span class="hljs-string">'되고'</span>, <span class="hljs-string">'되는'</span>, <span class="hljs-string">'되며'</span>, <span class="hljs-string">'되어'</span>, <span class="hljs-string">'되었'</span>, <span class="hljs-string">'인'</span>, <span class="hljs-string">'인데'</span>, <span class="hljs-string">'인지'</span>, <span class="hljs-string">'일까'</span>, <span class="hljs-string">'일듯'</span>, <span class="hljs-string">'이지'</span>, <span class="hljs-string">'이네'</span>, <span class="hljs-string">'이요'</span>, <span class="hljs-string">'이에요'</span>, <span class="hljs-string">'이예요'</span>, <span class="hljs-string">'이어요'</span>, <span class="hljs-string">'이잖아'</span>, <span class="hljs-string">'이구나'</span>, <span class="hljs-string">'이군요'</span>, <span class="hljs-string">'지만'</span>, <span class="hljs-string">'지요'</span>, <span class="hljs-string">'지는'</span>, <span class="hljs-string">'지도'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지가'</span>, <span class="hljs-string">'지의'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지와'</span>, <span class="hljs-string">'지는'</span> ]; <span class="hljs-keyword">foreach</span> ($common_patterns <span class="hljs-keyword">as</span> $pattern) { <span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) &gt; mb_strlen($pattern, <span class="hljs-string">'UTF-8'</span>)) { $suffix = mb_substr($word, -mb_strlen($pattern, <span class="hljs-string">'UTF-8'</span>), <span class="hljs-keyword">null</span>, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">if</span> ($suffix === $pattern) { <span class="hljs-comment">// 특정 단어에 대한 예외 처리 (예: "대한민국"에서 "한국"은 조사가 아님)</span> $stem = mb_substr($word, <span class="hljs-number">0</span>, mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) - mb_strlen($pattern, <span class="hljs-string">'UTF-8'</span>), <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">if</span> (mb_strlen($stem, <span class="hljs-string">'UTF-8'</span>) &gt;= <span class="hljs-number">2</span>) { <span class="hljs-keyword">return</span> <span class="hljs-keyword">true</span>; } } } } <span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>; } <span class="hljs-comment">/** * 한국어 조사 및 접속사 제거 함수 * - 조사, 접속사 목록을 확장하고, 단어 끝의 affix를 반복적으로 제거함 */</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">removeKoreanParticles</span><span class="hljs-params">($word)</span> </span>{ <span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) &lt;= <span class="hljs-number">2</span>) { <span class="hljs-keyword">return</span> $word; } <span class="hljs-comment">// 포괄적인 조사 목록 (복합 형태 포함)</span> $particles = [ <span class="hljs-string">'에게서'</span>, <span class="hljs-string">'한테서'</span>, <span class="hljs-string">'으로서'</span>, <span class="hljs-string">'으로써'</span>, <span class="hljs-string">'로써'</span>, <span class="hljs-string">'로서'</span>, <span class="hljs-string">'에서부터'</span>, <span class="hljs-string">'부터는'</span>, <span class="hljs-string">'까지는'</span>, <span class="hljs-string">'마저도'</span>, <span class="hljs-string">'조차도'</span>, <span class="hljs-string">'에게도'</span>, <span class="hljs-string">'한테도'</span>, <span class="hljs-string">'께서도'</span>, <span class="hljs-string">'에서는'</span>, <span class="hljs-string">'로는'</span>, <span class="hljs-string">'로써는'</span>, <span class="hljs-string">'만으로도'</span>, <span class="hljs-string">'만으로는'</span>, <span class="hljs-string">'에게는'</span>, <span class="hljs-string">'한테는'</span>, <span class="hljs-string">'께서는'</span>, <span class="hljs-string">'으로도'</span>, <span class="hljs-string">'로도'</span>, <span class="hljs-string">'과는'</span>, <span class="hljs-string">'와는'</span>, <span class="hljs-string">'만큼은'</span>, <span class="hljs-string">'처럼은'</span>, <span class="hljs-string">'보다는'</span>, <span class="hljs-string">'같이는'</span>, <span class="hljs-string">'마냥'</span>, <span class="hljs-string">'만은'</span>, <span class="hljs-string">'만이'</span>, <span class="hljs-string">'뿐만'</span>, <span class="hljs-string">'뿐이'</span>, <span class="hljs-string">'밖에'</span>, <span class="hljs-string">'같은'</span>, <span class="hljs-string">'처럼'</span>, <span class="hljs-string">'만큼'</span>, <span class="hljs-string">'만의'</span>, <span class="hljs-string">'같이'</span>, <span class="hljs-string">'에게'</span>, <span class="hljs-string">'한테'</span>, <span class="hljs-string">'께서'</span>, <span class="hljs-string">'으로'</span>, <span class="hljs-string">'에서'</span>, <span class="hljs-string">'부터'</span>, <span class="hljs-string">'까지'</span>, <span class="hljs-string">'마저'</span>, <span class="hljs-string">'조차'</span>, <span class="hljs-string">'마다'</span>, <span class="hljs-string">'보다'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'만을'</span>, <span class="hljs-string">'만의'</span>, <span class="hljs-string">'만이'</span>, <span class="hljs-string">'에도'</span>, <span class="hljs-string">'이랑'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이자'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이든'</span>, <span class="hljs-string">'이야'</span>, <span class="hljs-string">'만도'</span>, <span class="hljs-string">'하고'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이랑'</span>, <span class="hljs-string">'이자'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이든'</span>, <span class="hljs-string">'이요'</span>, <span class="hljs-string">'은'</span>, <span class="hljs-string">'는'</span>, <span class="hljs-string">'이'</span>, <span class="hljs-string">'가'</span>, <span class="hljs-string">'을'</span>, <span class="hljs-string">'를'</span>, <span class="hljs-string">'의'</span>, <span class="hljs-string">'에'</span>, <span class="hljs-string">'과'</span>, <span class="hljs-string">'와'</span>, <span class="hljs-string">'로'</span>, <span class="hljs-string">'며'</span>, <span class="hljs-string">'나'</span>, <span class="hljs-string">'랑'</span>, <span class="hljs-string">'도'</span>, <span class="hljs-string">'만'</span>, <span class="hljs-string">'야'</span>, <span class="hljs-string">'든'</span>, <span class="hljs-string">'고'</span>, <span class="hljs-string">'요'</span>, <span class="hljs-string">'라'</span>, <span class="hljs-string">'면'</span>, <span class="hljs-string">'씩'</span>, <span class="hljs-string">'뿐'</span>, <span class="hljs-string">'서'</span>, <span class="hljs-string">'께'</span>, <span class="hljs-string">'들'</span>, <span class="hljs-string">'쯤'</span>, <span class="hljs-string">'이란'</span>, <span class="hljs-string">'이라는'</span>, <span class="hljs-string">'이라고'</span>, <span class="hljs-string">'이라면'</span>, <span class="hljs-string">'이었던'</span>, <span class="hljs-string">'이었다'</span>, <span class="hljs-string">'이었고'</span>, <span class="hljs-string">'라는'</span>, <span class="hljs-string">'라고'</span>, <span class="hljs-string">'라면'</span>, <span class="hljs-string">'라도'</span>, <span class="hljs-string">'라서'</span>, <span class="hljs-string">'이지'</span>, <span class="hljs-string">'이네'</span>, <span class="hljs-string">'이요'</span>, <span class="hljs-string">'이에요'</span>, <span class="hljs-string">'이예요'</span>, <span class="hljs-string">'이어요'</span>, <span class="hljs-string">'이잖아'</span>, <span class="hljs-string">'이구나'</span>, <span class="hljs-string">'이군요'</span>, <span class="hljs-string">'지만'</span>, <span class="hljs-string">'지요'</span>, <span class="hljs-string">'지는'</span>, <span class="hljs-string">'지도'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지가'</span>, <span class="hljs-string">'지의'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지와'</span>, <span class="hljs-string">'지는'</span> ]; <span class="hljs-comment">// 포괄적인 접속사 목록</span> $conjunctions = [ <span class="hljs-string">'그리고'</span>, <span class="hljs-string">'그러나'</span>, <span class="hljs-string">'하지만'</span>, <span class="hljs-string">'또한'</span>, <span class="hljs-string">'그런데'</span>, <span class="hljs-string">'따라서'</span>, <span class="hljs-string">'그래서'</span>, <span class="hljs-string">'그러므로'</span>, <span class="hljs-string">'그렇지만'</span>, <span class="hljs-string">'하물며'</span>, <span class="hljs-string">'뿐만아니라'</span>, <span class="hljs-string">'게다가'</span>, <span class="hljs-string">'아울러'</span>, <span class="hljs-string">'더구나'</span>, <span class="hljs-string">'더욱이'</span>, <span class="hljs-string">'하여튼'</span>, <span class="hljs-string">'어쨌든'</span>, <span class="hljs-string">'결국'</span>, <span class="hljs-string">'그리하여'</span>, <span class="hljs-string">'따지고보면'</span>, <span class="hljs-string">'요컨대'</span>, <span class="hljs-string">'즉'</span>, <span class="hljs-string">'곧'</span>, <span class="hljs-string">'다시말해'</span>, <span class="hljs-string">'예컨대'</span>, <span class="hljs-string">'왜냐하면'</span>, <span class="hljs-string">'때문에'</span> ]; <span class="hljs-comment">// 접미사 목록</span> $suffixes = [ <span class="hljs-string">'하다'</span>, <span class="hljs-string">'되다'</span>, <span class="hljs-string">'시키다'</span>, <span class="hljs-string">'당하다'</span>, <span class="hljs-string">'받다'</span>, <span class="hljs-string">'스럽다'</span>, <span class="hljs-string">'적'</span>, <span class="hljs-string">'화'</span>, <span class="hljs-string">'성'</span>, <span class="hljs-string">'형'</span>, <span class="hljs-string">'감'</span>, <span class="hljs-string">'력'</span>, <span class="hljs-string">'미'</span>, <span class="hljs-string">'법'</span>, <span class="hljs-string">'용'</span>, <span class="hljs-string">'론'</span>, <span class="hljs-string">'식'</span>, <span class="hljs-string">'술'</span>, <span class="hljs-string">'상'</span>, <span class="hljs-string">'증'</span>, <span class="hljs-string">'망'</span>, <span class="hljs-string">'계'</span>, <span class="hljs-string">'층'</span>, <span class="hljs-string">'권'</span>, <span class="hljs-string">'율'</span>, <span class="hljs-string">'가'</span>, <span class="hljs-string">'수'</span>, <span class="hljs-string">'원'</span>, <span class="hljs-string">'장'</span>, <span class="hljs-string">'사'</span>, <span class="hljs-string">'자'</span>, <span class="hljs-string">'님'</span>, <span class="hljs-string">'씨'</span>, <span class="hljs-string">'군'</span>, <span class="hljs-string">'양'</span>, <span class="hljs-string">'이다'</span>, <span class="hljs-string">'입니다'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'이야'</span>, <span class="hljs-string">'이었'</span>, <span class="hljs-string">'하는'</span>, <span class="hljs-string">'하다'</span>, <span class="hljs-string">'했던'</span>, <span class="hljs-string">'했다'</span>, <span class="hljs-string">'했는'</span>, <span class="hljs-string">'했고'</span>, <span class="hljs-string">'하고'</span>, <span class="hljs-string">'하여'</span>, <span class="hljs-string">'하면'</span>, <span class="hljs-string">'해서'</span>, <span class="hljs-string">'된다'</span>, <span class="hljs-string">'됩니다'</span>, <span class="hljs-string">'되고'</span>, <span class="hljs-string">'되는'</span>, <span class="hljs-string">'되며'</span>, <span class="hljs-string">'되어'</span>, <span class="hljs-string">'되었'</span>, <span class="hljs-string">'인'</span>, <span class="hljs-string">'인데'</span>, <span class="hljs-string">'인지'</span>, <span class="hljs-string">'일까'</span>, <span class="hljs-string">'일듯'</span> ]; $affixes = array_merge($particles, $conjunctions, $suffixes); usort($affixes, <span class="hljs-function"><span class="hljs-keyword">function</span><span class="hljs-params">($a, $b)</span> </span>{ <span class="hljs-keyword">return</span> mb_strlen($b, <span class="hljs-string">'UTF-8'</span>) - mb_strlen($a, <span class="hljs-string">'UTF-8'</span>); }); $originalWord = $word; $changed = <span class="hljs-keyword">true</span>; <span class="hljs-keyword">while</span> ($changed) { $changed = <span class="hljs-keyword">false</span>; <span class="hljs-keyword">foreach</span> ($affixes <span class="hljs-keyword">as</span> $affix) { $affixLength = mb_strlen($affix, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) &lt;= $affixLength) { <span class="hljs-keyword">continue</span>; } $ending = mb_substr($word, -$affixLength, <span class="hljs-keyword">null</span>, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">if</span> ($ending === $affix) { $stem = mb_substr($word, <span class="hljs-number">0</span>, mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) - $affixLength, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">if</span> (mb_strlen($stem, <span class="hljs-string">'UTF-8'</span>) &lt; <span class="hljs-number">2</span>) { <span class="hljs-keyword">continue</span>; } <span class="hljs-comment">// 특정 affix에 대해 음운 규칙 적용</span> <span class="hljs-keyword">if</span> (in_array($affix, [<span class="hljs-string">'이'</span>, <span class="hljs-string">'은'</span>, <span class="hljs-string">'을'</span>, <span class="hljs-string">'으로'</span>])) { $lastChar = mb_substr($stem, <span class="hljs-number">-1</span>, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">if</span> (!hasFinalConsonant($lastChar)) { <span class="hljs-keyword">continue</span>; } } <span class="hljs-keyword">if</span> (in_array($affix, [<span class="hljs-string">'가'</span>, <span class="hljs-string">'는'</span>, <span class="hljs-string">'를'</span>, <span class="hljs-string">'로'</span>])) { $lastChar = mb_substr($stem, <span class="hljs-number">-1</span>, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">if</span> (hasFinalConsonant($lastChar)) { <span class="hljs-keyword">continue</span>; } } $word = $stem; $changed = <span class="hljs-keyword">true</span>; <span class="hljs-keyword">break</span>; } } } <span class="hljs-keyword">return</span> ($word !== $originalWord &amp;&amp; mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) &gt;= <span class="hljs-number">2</span>) ? $word : $originalWord; } <span class="hljs-comment">/** * 초성 일치율 계산 함수 * - 두 단어의 초성만 추출하여 비교 */</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">calculateInitialSimilarity</span><span class="hljs-params">($stx, $candidate)</span> </span>{ <span class="hljs-comment">// 초성 추출</span> $stx_initials = extractInitials($stx); $candidate_initials = extractInitials($candidate); <span class="hljs-comment">// 초성 길이 확인</span> $stx_len = mb_strlen($stx_initials, <span class="hljs-string">'UTF-8'</span>); $candidate_len = mb_strlen($candidate_initials, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-comment">// 빈 문자열 처리</span> <span class="hljs-keyword">if</span> ($stx_len == <span class="hljs-number">0</span> || $candidate_len == <span class="hljs-number">0</span>) { <span class="hljs-keyword">return</span> <span class="hljs-number">0</span>; } <span class="hljs-comment">// 일치하는 초성 개수 세기</span> $matching_count = <span class="hljs-number">0</span>; $compare_len = min($stx_len, $candidate_len); <span class="hljs-keyword">for</span> ($i = <span class="hljs-number">0</span>; $i &lt; $compare_len; $i++) { $stx_char = mb_substr($stx_initials, $i, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>); $candidate_char = mb_substr($candidate_initials, $i, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">if</span> ($stx_char === $candidate_char) { $matching_count++; } } <span class="hljs-comment">// 일치율 계산 (검색어 초성 수 기준)</span> $similarity = $matching_count / $stx_len; <span class="hljs-keyword">return</span> $similarity; } <span class="hljs-comment">/** * 전체 텍스트 유사도 계산 (레벤슈타인 거리 기반) */</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">calculateTextSimilarity</span><span class="hljs-params">($str1, $str2)</span> </span>{ $str1_len = mb_strlen($str1, <span class="hljs-string">'UTF-8'</span>); $str2_len = mb_strlen($str2, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-comment">// 빈 문자열 처리</span> <span class="hljs-keyword">if</span> ($str1_len == <span class="hljs-number">0</span>) <span class="hljs-keyword">return</span> <span class="hljs-number">0</span>; <span class="hljs-keyword">if</span> ($str2_len == <span class="hljs-number">0</span>) <span class="hljs-keyword">return</span> <span class="hljs-number">0</span>; <span class="hljs-comment">// 문자열을 배열로 변환</span> $str1_chars = preg_split(<span class="hljs-string">'//u'</span>, $str1, <span class="hljs-number">-1</span>, PREG_SPLIT_NO_EMPTY); $str2_chars = preg_split(<span class="hljs-string">'//u'</span>, $str2, <span class="hljs-number">-1</span>, PREG_SPLIT_NO_EMPTY); <span class="hljs-comment">// 거리 행렬 초기화</span> $distance = []; <span class="hljs-keyword">for</span> ($i = <span class="hljs-number">0</span>; $i &lt;= $str1_len; $i++) { $distance[$i][<span class="hljs-number">0</span>] = $i; } <span class="hljs-keyword">for</span> ($j = <span class="hljs-number">0</span>; $j &lt;= $str2_len; $j++) { $distance[<span class="hljs-number">0</span>][$j] = $j; } <span class="hljs-comment">// 레벤슈타인 거리 계산</span> <span class="hljs-keyword">for</span> ($i = <span class="hljs-number">1</span>; $i &lt;= $str1_len; $i++) { <span class="hljs-keyword">for</span> ($j = <span class="hljs-number">1</span>; $j &lt;= $str2_len; $j++) { $cost = ($str1_chars[$i<span class="hljs-number">-1</span>] === $str2_chars[$j<span class="hljs-number">-1</span>]) ? <span class="hljs-number">0</span> : <span class="hljs-number">1</span>; $distance[$i][$j] = min( $distance[$i<span class="hljs-number">-1</span>][$j] + <span class="hljs-number">1</span>, <span class="hljs-comment">// 삭제</span> $distance[$i][$j<span class="hljs-number">-1</span>] + <span class="hljs-number">1</span>, <span class="hljs-comment">// 삽입</span> $distance[$i<span class="hljs-number">-1</span>][$j<span class="hljs-number">-1</span>] + $cost <span class="hljs-comment">// 교체</span> ); } } <span class="hljs-comment">// 유사도 계산 (0~1 사이 값)</span> $max_distance = max($str1_len, $str2_len); $similarity = <span class="hljs-number">1</span> - ($distance[$str1_len][$str2_len] / $max_distance); <span class="hljs-keyword">return</span> $similarity; } <span class="hljs-comment">// 후보 단어 가져오기</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">getCandidates</span><span class="hljs-params">()</span> </span>{ <span class="hljs-keyword">global</span> $stx; $candidates = []; <span class="hljs-comment">// 인기 검색어 가져오기</span> $sql = <span class="hljs-string">"SELECT pp_word FROM g5_popular ORDER BY pp_date DESC, pp_count DESC LIMIT 100"</span>; $result = sql_query($sql); <span class="hljs-keyword">while</span> ($row = sql_fetch_array($result)) { $candidates[] = $row[<span class="hljs-string">'pp_word'</span>]; } <span class="hljs-comment">// 게시판에서 검색어 가져오기</span> $boards = [<span class="hljs-string">'g5_write_code'</span>, <span class="hljs-string">'g5_write_free'</span>, <span class="hljs-string">'g5_write_gallery'</span>, <span class="hljs-string">'g5_write_it'</span>, <span class="hljs-string">'g5_write_notice'</span>, <span class="hljs-string">'g5_write_blog'</span>]; <span class="hljs-keyword">foreach</span> ($boards <span class="hljs-keyword">as</span> $board) { <span class="hljs-keyword">try</span> { $safe_stx = sql_escape_string($stx); $sql = <span class="hljs-string">"SELECT wr_subject, wr_content FROM $board LIMIT 300"</span>; $result = sql_query($sql); <span class="hljs-keyword">while</span> ($row = sql_fetch_array($result)) { <span class="hljs-comment">// 제목에서 단어 추출</span> preg_match_all(<span class="hljs-string">'/[\pL\pN]+/u'</span>, $row[<span class="hljs-string">'wr_subject'</span>], $matches); <span class="hljs-keyword">foreach</span> ($matches[<span class="hljs-number">0</span>] <span class="hljs-keyword">as</span> $word) { <span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) &gt;= <span class="hljs-number">2</span>) { $candidates[] = $word; } } <span class="hljs-comment">// 내용에서도 단어 추출</span> preg_match_all(<span class="hljs-string">'/[\pL\pN]+/u'</span>, strip_tags($row[<span class="hljs-string">'wr_content'</span>]), $matches); <span class="hljs-keyword">foreach</span> ($matches[<span class="hljs-number">0</span>] <span class="hljs-keyword">as</span> $word) { <span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) &gt;= <span class="hljs-number">2</span>) { $candidates[] = $word; } } } } <span class="hljs-keyword">catch</span> (<span class="hljs-keyword">Exception</span> $e) { <span class="hljs-keyword">continue</span>; } } <span class="hljs-keyword">return</span> array_unique($candidates); } <span class="hljs-comment">// 조사 제거 및 필터링된 후보 단어 가져오기</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">getCleanCandidates</span><span class="hljs-params">($candidates)</span> </span>{ $clean_candidates = []; <span class="hljs-keyword">foreach</span> ($candidates <span class="hljs-keyword">as</span> $term) { <span class="hljs-comment">// 조사가 포함된 단어는 추가하지 않음</span> <span class="hljs-keyword">if</span> (hasParticleOrSuffix($term)) { <span class="hljs-keyword">continue</span>; } $clean_term = removeKoreanParticles($term); <span class="hljs-comment">// 조사를 제거한 후에도 여전히 조사가 붙어있으면 제외</span> <span class="hljs-keyword">if</span> (hasParticleOrSuffix($clean_term)) { <span class="hljs-keyword">continue</span>; } <span class="hljs-comment">// 너무 짧은 단어 제외 (2글자 이상만 포함)</span> <span class="hljs-keyword">if</span> (mb_strlen($clean_term, <span class="hljs-string">'UTF-8'</span>) &gt;= <span class="hljs-number">2</span>) { $clean_candidates[$clean_term] = <span class="hljs-keyword">true</span>; <span class="hljs-comment">// 중복 제거를 위해 키로 사용</span> } } <span class="hljs-keyword">return</span> array_keys($clean_candidates); } <span class="hljs-comment">// 연관 검색어 가져오기 (search_suggest.php 호출)</span> $related_terms = []; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, G5_BBS_URL . <span class="hljs-string">'/search_suggest.php?stx='</span> . urlencode($stx)); curl_setopt($ch, CURLOPT_RETURNTRANSFER, <span class="hljs-keyword">true</span>); $response = curl_exec($ch); curl_close($ch); <span class="hljs-keyword">if</span> ($response) { $related_terms = json_decode($response, <span class="hljs-keyword">true</span>) ?: []; } <span class="hljs-comment">// 검색어에서 조사 제거</span> $clean_stx = removeKoreanParticles($stx); <span class="hljs-comment">// 모든 후보 단어 가져오기</span> $all_candidates = array_merge($related_terms, getCandidates()); $clean_candidates = getCleanCandidates($all_candidates); <span class="hljs-comment">// stx와 정확히 일치하는 단어가 있는지 확인</span> $exact_match_exists = <span class="hljs-keyword">false</span>; <span class="hljs-keyword">foreach</span> ($clean_candidates <span class="hljs-keyword">as</span> $term) { <span class="hljs-keyword">if</span> (strtolower($term) === strtolower($clean_stx)) { $exact_match_exists = <span class="hljs-keyword">true</span>; <span class="hljs-keyword">break</span>; } } <span class="hljs-comment">// 결과 변수 초기화</span> $correction = <span class="hljs-string">''</span>; $related = []; <span class="hljs-comment">// 한글이 포함된 경우에만 처리</span> <span class="hljs-keyword">if</span> (preg_match(<span class="hljs-string">'/[\x{AC00}-\x{D7A3}]/u'</span>, $stx)) { $stx_initials = extractInitials($clean_stx); <span class="hljs-comment">// 초성이 일치하는 단어들 찾기</span> $initial_matches = []; <span class="hljs-keyword">foreach</span> ($clean_candidates <span class="hljs-keyword">as</span> $term) { <span class="hljs-comment">// 자기 자신은 건너뛰기</span> <span class="hljs-keyword">if</span> (strtolower($term) === strtolower($clean_stx)) { <span class="hljs-keyword">continue</span>; } <span class="hljs-comment">// 초성 유사도 계산</span> $similarity = calculateInitialSimilarity($clean_stx, $term); <span class="hljs-keyword">if</span> ($similarity &gt;= <span class="hljs-number">0.75</span>) { <span class="hljs-comment">// 75% 이상 일치</span> $initial_matches[$term] = $similarity; } } <span class="hljs-comment">// 정확히 일치하는 단어가 없는 경우: 맞춤법 교정 제안</span> <span class="hljs-keyword">if</span> (!$exact_match_exists &amp;&amp; !<span class="hljs-keyword">empty</span>($initial_matches)) { <span class="hljs-comment">// 유사도 기준으로 정렬</span> arsort($initial_matches); $correction = key($initial_matches); <span class="hljs-comment">// 가장 유사도가 높은 첫 번째 단어</span> } <span class="hljs-comment">// 정확히 일치하는 단어가 있는 경우: 연관 검색어 제안</span> <span class="hljs-keyword">if</span> ($exact_match_exists &amp;&amp; !<span class="hljs-keyword">empty</span>($initial_matches)) { <span class="hljs-comment">// 초성 기반 연관 검색어</span> $related = array_keys($initial_matches); } <span class="hljs-comment">// 정확히 일치하는 단어도 없고 초성 일치하는 단어도 없는 경우: 텍스트 유사도 기반 추천</span> <span class="hljs-keyword">if</span> (!$exact_match_exists &amp;&amp; <span class="hljs-keyword">empty</span>($initial_matches)) { $text_matches = []; <span class="hljs-keyword">foreach</span> ($clean_candidates <span class="hljs-keyword">as</span> $term) { $similarity = calculateTextSimilarity($clean_stx, $term); <span class="hljs-keyword">if</span> ($similarity &gt;= <span class="hljs-number">0.85</span>) { <span class="hljs-comment">// 85% 이상 일치</span> $text_matches[$term] = $similarity; } } <span class="hljs-comment">// 유사도 기준으로 정렬</span> arsort($text_matches); <span class="hljs-comment">// 연관 검색어 설정</span> $related = array_keys($text_matches); } } <span class="hljs-comment">// 맞춤법 교정이 있고 조사가 붙어있으면 제거</span> <span class="hljs-keyword">if</span> (!<span class="hljs-keyword">empty</span>($correction) &amp;&amp; hasParticleOrSuffix($correction)) { $correction = removeKoreanParticles($correction); <span class="hljs-comment">// 여전히 조사가 있으면 무시</span> <span class="hljs-keyword">if</span> (hasParticleOrSuffix($correction)) { $correction = <span class="hljs-string">''</span>; } } <span class="hljs-comment">// 연관 검색어 필터링 (조사가 있거나 검색어와 동일한 경우 제외)</span> $filtered_related = []; <span class="hljs-keyword">foreach</span> ($related <span class="hljs-keyword">as</span> $term) { <span class="hljs-keyword">if</span> (strtolower($term) !== strtolower($clean_stx) &amp;&amp; !hasParticleOrSuffix($term)) { $filtered_related[] = $term; } } <span class="hljs-comment">// 결과 반환</span> <span class="hljs-keyword">echo</span> json_encode([ <span class="hljs-string">'correction'</span> =&gt; $correction, <span class="hljs-string">'related'</span> =&gt; array_slice($filtered_related, <span class="hljs-number">0</span>, <span class="hljs-number">5</span>) <span class="hljs-comment">// 최대 5개 연관 검색어 반환</span> ]); <span class="hljs-meta">?&gt;</span></span></code><code class="hljs" contenteditable="true"><br></code></pre></div><p><br></p><p><br></p><div class="t2-code-block"><pre><code class="hljs xml" contenteditable="true"><span class="php"><span class="hljs-meta">&lt;?php</span> <span class="hljs-keyword">include_once</span>(<span class="hljs-string">'./_common.php'</span>); <span class="hljs-comment">// stx 파라미터 값 처리</span> $stx = <span class="hljs-keyword">isset</span>($_GET[<span class="hljs-string">'stx'</span>]) ? trim($_GET[<span class="hljs-string">'stx'</span>]) : <span class="hljs-string">''</span>; header(<span class="hljs-string">'Content-Type: application/json'</span>); <span class="hljs-keyword">if</span> (<span class="hljs-keyword">empty</span>($stx) || mb_strlen($stx, <span class="hljs-string">'UTF-8'</span>) &lt; <span class="hljs-number">2</span>) { <span class="hljs-keyword">echo</span> json_encode([<span class="hljs-string">'correction'</span> =&gt; <span class="hljs-string">''</span>, <span class="hljs-string">'related'</span> =&gt; []]); <span class="hljs-keyword">exit</span>; } <span class="hljs-comment">// 한글 문자를 위한 유니코드 변환 함수 (캐싱 불필요, 단순 계산 함수로 유지)</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">uniord</span><span class="hljs-params">($c)</span> </span>{ $h = ord($c[<span class="hljs-number">0</span>]); <span class="hljs-keyword">if</span> ($h &lt;= <span class="hljs-number">0x7F</span>) <span class="hljs-keyword">return</span> $h; <span class="hljs-keyword">if</span> ($h &lt; <span class="hljs-number">0xC2</span>) <span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>; <span class="hljs-keyword">if</span> ($h &lt;= <span class="hljs-number">0xDF</span>) <span class="hljs-keyword">return</span> (($h &amp; <span class="hljs-number">0x1F</span>) &lt;&lt; <span class="hljs-number">6</span>) | (ord($c[<span class="hljs-number">1</span>]) &amp; <span class="hljs-number">0x3F</span>); <span class="hljs-keyword">if</span> ($h &lt;= <span class="hljs-number">0xEF</span>) <span class="hljs-keyword">return</span> (($h &amp; <span class="hljs-number">0x0F</span>) &lt;&lt; <span class="hljs-number">12</span>) | ((ord($c[<span class="hljs-number">1</span>]) &amp; <span class="hljs-number">0x3F</span>) &lt;&lt; <span class="hljs-number">6</span>) | (ord($c[<span class="hljs-number">2</span>]) &amp; <span class="hljs-number">0x3F</span>); <span class="hljs-keyword">if</span> ($h &lt;= <span class="hljs-number">0xF4</span>) <span class="hljs-keyword">return</span> (($h &amp; <span class="hljs-number">0x07</span>) &lt;&lt; <span class="hljs-number">18</span>) | ((ord($c[<span class="hljs-number">1</span>]) &amp; <span class="hljs-number">0x3F</span>) &lt;&lt; <span class="hljs-number">12</span>) | ((ord($c[<span class="hljs-number">2</span>]) &amp; <span class="hljs-number">0x3F</span>) &lt;&lt; <span class="hljs-number">6</span>) | (ord($c[<span class="hljs-number">3</span>]) &amp; <span class="hljs-number">0x3F</span>); <span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>; } <span class="hljs-comment">// 초성 추출 함수 (불필요한 변수 최소화)</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">extractInitials</span><span class="hljs-params">($text)</span> </span>{ <span class="hljs-keyword">static</span> $choseong = [<span class="hljs-string">'ㄱ'</span>, <span class="hljs-string">'ㄲ'</span>, <span class="hljs-string">'ㄴ'</span>, <span class="hljs-string">'ㄷ'</span>, <span class="hljs-string">'ㄸ'</span>, <span class="hljs-string">'ㄹ'</span>, <span class="hljs-string">'ㅁ'</span>, <span class="hljs-string">'ㅂ'</span>, <span class="hljs-string">'ㅃ'</span>, <span class="hljs-string">'ㅅ'</span>, <span class="hljs-string">'ㅆ'</span>, <span class="hljs-string">'ㅇ'</span>, <span class="hljs-string">'ㅈ'</span>, <span class="hljs-string">'ㅉ'</span>, <span class="hljs-string">'ㅊ'</span>, <span class="hljs-string">'ㅋ'</span>, <span class="hljs-string">'ㅌ'</span>, <span class="hljs-string">'ㅍ'</span>, <span class="hljs-string">'ㅎ'</span>]; $initials = <span class="hljs-string">''</span>; $text_len = mb_strlen($text, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">for</span> ($i = <span class="hljs-number">0</span>; $i &lt; $text_len; $i++) { $char = mb_substr($text, $i, <span class="hljs-number">1</span>, <span class="hljs-string">'UTF-8'</span>); $code = uniord($char); <span class="hljs-keyword">if</span> ($code &gt;= <span class="hljs-number">0xAC00</span> &amp;&amp; $code &lt;= <span class="hljs-number">0xD7A3</span>) { $initials .= $choseong[intval(($code - <span class="hljs-number">0xAC00</span>) / <span class="hljs-number">588</span>)]; <span class="hljs-comment">// 588 = 28 * 21</span> } <span class="hljs-keyword">else</span> { $initials .= $char; } } <span class="hljs-keyword">return</span> $initials; } <span class="hljs-comment">// 받침(종성) 검사 함수 (간소화)</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">hasFinalConsonant</span><span class="hljs-params">($char)</span> </span>{ $code = uniord($char); <span class="hljs-keyword">return</span> ($code &gt;= <span class="hljs-number">0xAC00</span> &amp;&amp; $code &lt;= <span class="hljs-number">0xD7A3</span>) ? (($code - <span class="hljs-number">0xAC00</span>) % <span class="hljs-number">28</span>) !== <span class="hljs-number">0</span> : <span class="hljs-keyword">false</span>; } <span class="hljs-comment">// 조사/접미사 검사 함수 (정적 배열로 최적화)</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">hasParticleOrSuffix</span><span class="hljs-params">($word)</span> </span>{ <span class="hljs-keyword">static</span> $patterns = [ <span class="hljs-string">'이란'</span>, <span class="hljs-string">'이라는'</span>, <span class="hljs-string">'이라고'</span>, <span class="hljs-string">'이라면'</span>, <span class="hljs-string">'이었던'</span>, <span class="hljs-string">'이었다'</span>, <span class="hljs-string">'이었고'</span>, <span class="hljs-string">'으로'</span>, <span class="hljs-string">'으로써'</span>, <span class="hljs-string">'으로서'</span>, <span class="hljs-string">'으로는'</span>, <span class="hljs-string">'으로도'</span>, <span class="hljs-string">'에서'</span>, <span class="hljs-string">'에서는'</span>, <span class="hljs-string">'에서도'</span>, <span class="hljs-string">'에서만'</span>, <span class="hljs-string">'에서의'</span>, <span class="hljs-string">'라는'</span>, <span class="hljs-string">'라고'</span>, <span class="hljs-string">'라면'</span>, <span class="hljs-string">'라도'</span>, <span class="hljs-string">'라서'</span>, <span class="hljs-string">'에게'</span>, <span class="hljs-string">'에게는'</span>, <span class="hljs-string">'에게서'</span>, <span class="hljs-string">'에게도'</span>, <span class="hljs-string">'과의'</span>, <span class="hljs-string">'과는'</span>, <span class="hljs-string">'과도'</span>, <span class="hljs-string">'과가'</span>, <span class="hljs-string">'와의'</span>, <span class="hljs-string">'와는'</span>, <span class="hljs-string">'와도'</span>, <span class="hljs-string">'와가'</span>, <span class="hljs-string">'보다'</span>, <span class="hljs-string">'보다는'</span>, <span class="hljs-string">'보다도'</span>, <span class="hljs-string">'부터'</span>, <span class="hljs-string">'까지'</span>, <span class="hljs-string">'만큼'</span>, <span class="hljs-string">'처럼'</span>, <span class="hljs-string">'같이'</span>, <span class="hljs-string">'이다'</span>, <span class="hljs-string">'입니다'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'이야'</span>, <span class="hljs-string">'이었'</span>, <span class="hljs-string">'하는'</span>, <span class="hljs-string">'하다'</span>, <span class="hljs-string">'했던'</span>, <span class="hljs-string">'했다'</span>, <span class="hljs-string">'했는'</span>, <span class="hljs-string">'했고'</span>, <span class="hljs-string">'하고'</span>, <span class="hljs-string">'하여'</span>, <span class="hljs-string">'하면'</span>, <span class="hljs-string">'해서'</span>, <span class="hljs-string">'된다'</span>, <span class="hljs-string">'됩니다'</span>, <span class="hljs-string">'되고'</span>, <span class="hljs-string">'되는'</span>, <span class="hljs-string">'되며'</span>, <span class="hljs-string">'되어'</span>, <span class="hljs-string">'되었'</span>, <span class="hljs-string">'인'</span>, <span class="hljs-string">'인데'</span>, <span class="hljs-string">'인지'</span>, <span class="hljs-string">'일까'</span>, <span class="hljs-string">'일듯'</span>, <span class="hljs-string">'이지'</span>, <span class="hljs-string">'이네'</span>, <span class="hljs-string">'이요'</span>, <span class="hljs-string">'이에요'</span>, <span class="hljs-string">'이예요'</span>, <span class="hljs-string">'이어요'</span>, <span class="hljs-string">'이잖아'</span>, <span class="hljs-string">'이구나'</span>, <span class="hljs-string">'이군요'</span>, <span class="hljs-string">'지만'</span>, <span class="hljs-string">'지요'</span>, <span class="hljs-string">'지는'</span>, <span class="hljs-string">'지도'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지가'</span>, <span class="hljs-string">'지의'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지와'</span>, <span class="hljs-string">'지는'</span> ]; $word_len = mb_strlen($word, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">foreach</span> ($patterns <span class="hljs-keyword">as</span> $pattern) { $p_len = mb_strlen($pattern, <span class="hljs-string">'UTF-8'</span>); <span class="hljs-keyword">if</span> ($word_len &gt; $p_len &amp;&amp; mb_substr($word, -$p_len) === $pattern) { <span class="hljs-keyword">return</span> mb_strlen(mb_substr($word, <span class="hljs-number">0</span>, $word_len - $p_len), <span class="hljs-string">'UTF-8'</span>) &gt;= <span class="hljs-number">2</span>; } } <span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>; } <span class="hljs-comment">// 조사 및 접속사 제거 함수 (정적 배열 및 최소화된 루프)</span> <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">removeKoreanParticles</span><span class="hljs-params">($word)</span> </span>{ <span class="hljs-keyword">if</span> (mb_strlen($word, <span class="hljs-string">'UTF-8'</span>) &lt;= <span class="hljs-number">2</span>) <span class="hljs-keyword">return</span> $word; <span class="hljs-keyword">static</span> $affixes = <span class="hljs-keyword">null</span>; <span class="hljs-keyword">if</span> ($affixes === <span class="hljs-keyword">null</span>) { $affixes = array_merge( [<span class="hljs-string">'에게서'</span>, <span class="hljs-string">'한테서'</span>, <span class="hljs-string">'으로서'</span>, <span class="hljs-string">'으로써'</span>, <span class="hljs-string">'로써'</span>, <span class="hljs-string">'로서'</span>, <span class="hljs-string">'에서부터'</span>, <span class="hljs-string">'부터는'</span>, <span class="hljs-string">'까지는'</span>, <span class="hljs-string">'마저도'</span>, <span class="hljs-string">'조차도'</span>, <span class="hljs-string">'에게도'</span>, <span class="hljs-string">'한테도'</span>, <span class="hljs-string">'께서도'</span>, <span class="hljs-string">'에서는'</span>, <span class="hljs-string">'로는'</span>, <span class="hljs-string">'로써는'</span>, <span class="hljs-string">'만으로도'</span>, <span class="hljs-string">'만으로는'</span>, <span class="hljs-string">'에게는'</span>, <span class="hljs-string">'한테는'</span>, <span class="hljs-string">'께서는'</span>, <span class="hljs-string">'으로도'</span>, <span class="hljs-string">'로도'</span>, <span class="hljs-string">'과는'</span>, <span class="hljs-string">'와는'</span>, <span class="hljs-string">'만큼은'</span>, <span class="hljs-string">'처럼은'</span>, <span class="hljs-string">'보다는'</span>, <span class="hljs-string">'같이는'</span>, <span class="hljs-string">'마냥'</span>, <span class="hljs-string">'만은'</span>, <span class="hljs-string">'만이'</span>, <span class="hljs-string">'뿐만'</span>, <span class="hljs-string">'뿐이'</span>, <span class="hljs-string">'밖에'</span>, <span class="hljs-string">'같은'</span>, <span class="hljs-string">'처럼'</span>, <span class="hljs-string">'만큼'</span>, <span class="hljs-string">'만의'</span>, <span class="hljs-string">'같이'</span>, <span class="hljs-string">'에게'</span>, <span class="hljs-string">'한테'</span>, <span class="hljs-string">'께서'</span>, <span class="hljs-string">'으로'</span>, <span class="hljs-string">'에서'</span>, <span class="hljs-string">'부터'</span>, <span class="hljs-string">'까지'</span>, <span class="hljs-string">'마저'</span>, <span class="hljs-string">'조차'</span>, <span class="hljs-string">'마다'</span>, <span class="hljs-string">'보다'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'만을'</span>, <span class="hljs-string">'만의'</span>, <span class="hljs-string">'만이'</span>, <span class="hljs-string">'에도'</span>, <span class="hljs-string">'이랑'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이자'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이든'</span>, <span class="hljs-string">'이야'</span>, <span class="hljs-string">'만도'</span>, <span class="hljs-string">'하고'</span>, <span class="hljs-string">'이며'</span>, <span class="hljs-string">'이랑'</span>, <span class="hljs-string">'이자'</span>, <span class="hljs-string">'이나'</span>, <span class="hljs-string">'이고'</span>, <span class="hljs-string">'이든'</span>, <span class="hljs-string">'이요'</span>, <span class="hljs-string">'은'</span>, <span class="hljs-string">'는'</span>, <span class="hljs-string">'이'</span>, <span class="hljs-string">'가'</span>, <span class="hljs-string">'을'</span>, <span class="hljs-string">'를'</span>, <span class="hljs-string">'의'</span>, <span class="hljs-string">'에'</span>, <span class="hljs-string">'과'</span>, <span class="hljs-string">'와'</span>, <span class="hljs-string">'로'</span>, <span class="hljs-string">'며'</span>, <span class="hljs-string">'나'</span>, <span class="hljs-string">'랑'</span>, <span class="hljs-string">'도'</span>, <span class="hljs-string">'만'</span>, <span class="hljs-string">'야'</span>, <span class="hljs-string">'든'</span>, <span class="hljs-string">'고'</span>, <span class="hljs-string">'요'</span>, <span class="hljs-string">'라'</span>, <span class="hljs-string">'면'</span>, <span class="hljs-string">'씩'</span>, <span class="hljs-string">'뿐'</span>, <span class="hljs-string">'서'</span>, <span class="hljs-string">'께'</span>, <span class="hljs-string">'들'</span>, <span class="hljs-string">'쯤'</span>, <span class="hljs-string">'이란'</span>, <span class="hljs-string">'이라는'</span>, <span class="hljs-string">'이라고'</span>, <span class="hljs-string">'이라면'</span>, <span class="hljs-string">'이었던'</span>, <span class="hljs-string">'이었다'</span>, <span class="hljs-string">'이었고'</span>, <span class="hljs-string">'라는'</span>, <span class="hljs-string">'라고'</span>, <span class="hljs-string">'라면'</span>, <span class="hljs-string">'라도'</span>, <span class="hljs-string">'라서'</span>, <span class="hljs-string">'이지'</span>, <span class="hljs-string">'이네'</span>, <span class="hljs-string">'이요'</span>, <span class="hljs-string">'이에요'</span>, <span class="hljs-string">'이예요'</span>, <span class="hljs-string">'이어요'</span>, <span class="hljs-string">'이잖아'</span>, <span class="hljs-string">'이구나'</span>, <span class="hljs-string">'이군요'</span>, <span class="hljs-string">'지만'</span>, <span class="hljs-string">'지요'</span>, <span class="hljs-string">'지는'</span>, <span class="hljs-string">'지도'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지가'</span>, <span class="hljs-string">'지의'</span>, <span class="hljs-string">'지를'</span>, <span class="hljs-string">'지와'</span>, <span class="hljs-string">'지는'</span>], [<span class="hljs-string">'그리고'</span>, <span class="hljs-string">'그러나'</span>, <span class="hljs-string">'하지만'</span>, <span class="hljs-string">'또한'</span>, <span class="hljs-string">'그런데'</span>, <span class="hljs-string">'따라서'</span>, <span class="hljs-string">'그래서'</span>, <span class="hljs-string">'그러므로'</span>, <span class="hljs-string">'그렇지만'</span>, <span class="hljs-string">'하물며'</span>, <span class="hljs-string">'뿐만아니라'</span>, <span class="hljs-string">'게다가'</span>, <span class="hljs-string">'아울러'</span>, <span class="hljs-string">'더구나'</span>, <span class="hljs-string">'더욱이'</span>, <span class="hljs-string">'하여튼'</span>, <span class="hljs-string">'어쨌든'</span>, <span class="hljs-string">'결국'</span>, <span class="hljs-string">'그리하여'</span>, <span class="hljs-string">'따지고보면'</span>, <span class="hljs-string">'요컨대'</span>, <span class="hljs-string">'즉'</span>, <span class="hljs-string">'곧'</span>, <span class="hljs-string">'다시말해'</span>, <span class="hljs-string">'예컨대'</span>, <span class="hljs-string">'왜냐하면'</span>, <span class="hljs-string">'때문에'</span>], [<span class="hljs-string">'하다'</span>, <span class="hljs-string">'되다'</span>, <span class="hljs-string">'시키다'</span>, <span class="hljs-string">'당하다'</span>, <span class="hljs-string">'받다'</span>, <span class="hljs-string">'스럽다'</span>, <span class="hljs-string">'적'</span>, <span class="hljs-string">'화'</span>, <span class="hljs-string">'성'</span>, <span class="hljs-string">'형'</span>, <span class="hljs-string">'감'</span>, <span class="hljs-string">'력'</span>, <span class="hljs-string">'미'</span>, <span class="hljs-string">'법'</span>, <span class="hljs-string">'용'</span>, <span class="hljs-string">'론'</span>, <span class="hljs-string">'식'</span>, <span class="hljs-string">'술'</span>, <span class="hljs-string">'상'</span>, <span class="hljs-string">'증'</span>, <span class="hljs-string">'망'</span>, <span class="hljs-string">'계'</span>, <span class="hljs-string">'층'</span>, <span class=