00001 <?php
00002 class MISR
00003 {
00004 var $conn;
00005 var $phonetic;
00006 var $stopWords;
00007
00008 function __construct($conn)
00009 {
00010 $this->conn = $conn;
00011 $this->phonetic = new MPhonetic();
00012 }
00013
00014 private function setStopWords()
00015 {
00016 $this->stopWords = array
00017 (
00018 'QUE',
00019 'PARA',
00020 'COM',
00021 'NÃO',
00022 'UMA',
00023 'POR',
00024 'MAIS',
00025 'DOS',
00026 'COMO',
00027 'MAS',
00028 'FOI',
00029 'ELE',
00030 'DAS',
00031 'TEM',
00032 'SEU',
00033 'SUA',
00034 'SER',
00035 'QUANDO',
00036 'MUITO',
00037 'NOS',
00038 'ESTÁ',
00039 'TAMBÉM',
00040 'PELO',
00041 'PELA',
00042 'ATÉ',
00043 'ISSO',
00044 'ELA',
00045 'ENTRE',
00046 'ERA',
00047 'DEPOIS',
00048 'SEM',
00049 'MESMO',
00050 'AOS',
00051 'SEUS',
00052 'QUEM',
00053 'NAS',
00054 'ESSE',
00055 'ESTÃO',
00056 'VOCÊ',
00057 'ESSA',
00058 'NUM',
00059 'NEM',
00060 'SUAS',
00061 'MEU',
00062 'MINHA',
00063 'NUMA',
00064 'PELOS',
00065 'ELAS',
00066 'HAVIA',
00067 'SEJA',
00068 'QUAL',
00069 'NÓS',
00070 'LHE',
00071 'DELES',
00072 'ESSAS',
00073 'ESSES',
00074 'PELAS',
00075 'ESTE',
00076 'DELE',
00077 'VOCÊS',
00078 'VOS',
00079 'LHES',
00080 'MEUS',
00081 'MINHAS',
00082 'TEU',
00083 'TUA',
00084 'TEUS',
00085 'TUAS',
00086 'NOSSO',
00087 'NOSSA',
00088 'NOSSOS',
00089 'NOSSAS',
00090 'DELA',
00091 'DELAS',
00092 'ESTA',
00093 'ESTES',
00094 'ESTAS',
00095 'AQUELE',
00096 'AQUELA',
00097 'AQUELES',
00098 'AQUELAS',
00099 'ISTO',
00100 'AQUILO',
00101 'ESTOU',
00102 'ESTÁ',
00103 'ESTAMOS',
00104 'ESTÃO',
00105 'ESTIVE',
00106 'ESTEVE',
00107 'ESTIVEMOS',
00108 'ESTIVERAM',
00109 'ESTAVA',
00110 'ESTÁVAMOS',
00111 'ESTAVAM',
00112 'ESTIVERA',
00113 'ESTIVÉRAMOS',
00114 'ESTEJA',
00115 'ESTEJAMOS',
00116 'ESTEJAM',
00117 'ESTIVESSE',
00118 'ESTIVÉSSEMOS',
00119 'ESTIVESSEM',
00120 'ESTIVER',
00121 'ESTIVERMOS',
00122 'ESTIVEREM',
00123 'HEI',
00124 'HAVEMOS',
00125 'HÃO',
00126 'HOUVE',
00127 'HOUVEMOS',
00128 'HOUVERAM',
00129 'HOUVERA',
00130 'HOUVÉRAMOS',
00131 'HAJA',
00132 'HAJAMOS',
00133 'HAJAM',
00134 'HOUVESSE',
00135 'HOUVÉSSEMOS',
00136 'HOUVESSEM',
00137 'HOUVER',
00138 'HOUVERMOS',
00139 'HOUVEREM',
00140 'HOUVEREI',
00141 'HOUVERÁ',
00142 'HOUVEREMOS',
00143 'HOUVERÃO',
00144 'HOUVERIA',
00145 'HOUVERÍAMOS',
00146 'HOUVERIAM',
00147 'SOU',
00148 'SOMOS',
00149 'SÃO',
00150 'ERA',
00151 'ÉRAMOS',
00152 'ERAM',
00153 'FUI',
00154 'FOI',
00155 'FOMOS',
00156 'FORAM',
00157 'FORA',
00158 'FÔRAMOS',
00159 'SEJA',
00160 'SEJAMOS',
00161 'SEJAM',
00162 'FOSSE',
00163 'FÔSSEMOS',
00164 'FOSSEM',
00165 'FOR',
00166 'FORMOS',
00167 'FOREM',
00168 'SEREI',
00169 'SERÁ',
00170 'SEREMOS',
00171 'SERÃO',
00172 'SERIA',
00173 'SERÍAMOS',
00174 'SERIAM',
00175 'TENHO',
00176 'TEM',
00177 'TEMOS',
00178 'TÉM',
00179 'TINHA',
00180 'TÍNHAMOS',
00181 'TINHAM',
00182 'TIVE',
00183 'TEVE',
00184 'TIVEMOS',
00185 'TIVERAM',
00186 'TIVERA',
00187 'TIVÉRAMOS',
00188 'TENHA',
00189 'TENHAMOS',
00190 'TENHAM',
00191 'TIVESSE',
00192 'TIVÉSSEMOS',
00193 'TIVESSEM',
00194 'TIVER',
00195 'TIVERMOS',
00196 'TIVEREM',
00197 'TEREI',
00198 'TERÁ',
00199 'TEREMOS',
00200 'TERÃO',
00201 'TERIA',
00202 'TERÍAMOS',
00203 'TERIAM'
00204 );
00205 }
00206
00207 private function isValid($word)
00208 {
00209 $isValid = false;
00210
00211 if (strlen($word) > 2)
00212 {
00213 $isValid = (array_search($word, $this->stopWords) === false);
00214 }
00215
00216 return $isValid;
00217 }
00218
00219 private function getTokens($string, &$array)
00220 {
00221 $tok = strtok($string, " ");
00222
00223 while ($tok)
00224 {
00225 $tok = trim($tok);
00226
00227 if (strlen($tok) > 2)
00228 $array[] = $tok;
00229
00230 $tok = strtok(" ");
00231 }
00232 }
00233
00234 function getIdField($tableName, $fieldName)
00235 {
00236 $sql = new sql('idField', 'ISR_FIELD', '(TableName = ?) and (FieldName = ?)');
00237 $sql->SetParameters(strtoupper($tableName), strtoupper($fieldName));
00238 $query = $this->conn->getQuery($sql);
00239
00240 if (!$query->eof)
00241 {
00242 $idField = $query->fields('idfield');
00243 }
00244 else
00245 {
00246 $db = $this->conn->db;
00247 $idField = $db->GETNewId('seq_isr_field');
00248 $sql = new sql('idField,TableName,FieldName', 'ISR_FIELD');
00249 $sql->SetParameters($idField, strtoupper($tableName), strtoupper($fieldName));
00250 $this->conn->Execute($sql->Insert());
00251 }
00252
00253 return $idField;
00254 }
00255
00256 function getIdWord($word)
00257 {
00258 $sql = new sql('idWord', 'ISR_WORD', "(word = '$word')");
00259 $query = $this->conn->getQuery($sql);
00260
00261 if (!$query->eof)
00262 {
00263 $idWord = $query->fields('idWord');
00264 }
00265 else
00266 {
00267 $db = $this->conn->db;
00268 $idWord = $db->GETNewId('seq_isr_word');
00269 $sql = new sql('idWord,Word', 'ISR_WORD');
00270 $sql->SetParameters($idWord, $word);
00271 $this->conn->Execute($sql->Insert());
00272 }
00273
00274 return $idWord;
00275 }
00276
00277 function getIdWordFono($word)
00278 {
00279 $fonoWord = $this->fonetize($word);
00280 $sql = new sql('idWord', 'ISR_WORDFONO', "(word = '$fonoWord')");
00281 $query = $this->conn->getQuery($sql);
00282
00283 if (!$query->eof)
00284 {
00285 $idWord = $query->fields('idWord');
00286 }
00287 else
00288 {
00289 $db = $this->conn->db;
00290 $idWord = $db->GETNewId('seq_isr_word');
00291 $sql = new sql('idWord,Word', 'ISR_WORDFONO');
00292 $sql->SetParameters($idWord, $fonoWord);
00293 $this->conn->Execute($sql->Insert());
00294 }
00295
00296 return $idWord;
00297 }
00298
00299 function indexer($tableName, $fieldname, $pk, $phrase, $fono = true)
00300 {
00301 $this->setStopWords();
00302 $idField = $this->getIdField($tableName, $fieldname);
00303 $this->getTokens($phrase, $words);
00304 $n = count($words);
00305
00306 for ($i = 0; $i < $n; $i++)
00307 {
00308 $word = strtoupper(trim($words[$i]));
00309
00310 if ($this->isValid($word))
00311 {
00312 $idWord = $this->getIdWord($word);
00313 $sql = new sql('idField, idWord, PK', 'ISR_INDEX');
00314 $sql->SetParameters($idField, $idWord, $pk);
00315 $this->conn->Execute($sql->Insert());
00316
00317 if ($fono)
00318 {
00319 $idWord = $this->getIdWordFono($word);
00320 $sql = new sql('idField, idWord, PK', 'ISR_INDEXFONO');
00321 $sql->SetParameters($idField, $idWord, $pk);
00322 $this->conn->Execute($sql->Insert());
00323 }
00324 }
00325 }
00326 }
00327
00328 function delete($tableName, $fieldname, $pk)
00329 {
00330 $idField = $this->getIdField($tableName, $fieldname);
00331 $sql = new sql('', 'ISR_INDEX', '(idField = ?) and (pk = ?)');
00332 $sql->SetParameters($idField, $pk);
00333 $this->conn->Execute($sql->Delete());
00334 }
00335
00336 function retrieve($tableName, $fieldName, $phrase, $key, $fields = '*', $fono = true)
00337 {
00338 $phrase = trim(strtoupper($phrase));
00339 $phrase = $this->phonetic->removeMultiple($phrase);
00340 $phrase = $this->phonetic->removeStrange($phrase);
00341 $phrase = $this->phonetic->removeAccentuation($phrase);
00342
00343 $idField = $this->getIdField($tableName, $fieldName);
00344 $phrase .= ' ';
00345 $n = strlen($phrase);
00346 $w = '';
00347 $cmd = '';
00348
00349 for ($i = 0; $i < $n; $i++)
00350 {
00351 $c = $phrase{$i};
00352 if ($c == chr(32))
00353 {
00354 if ($w != '')
00355 {
00356 if (strlen($w) < 3)
00357 {
00358 if (($w == 'OU') || ($w == 'E'))
00359 $last = $w;
00360 else
00361 $w = '';
00362 }
00363 else
00364 {
00365 $word = str_replace('(', '', str_replace(')', '', $w));
00366 if ($fono)
00367 {
00368 $wordfono = $this->fonetize($word);
00369 $idWord = $this->getIdWordFono($wordfono);
00370 $w = str_replace($word, "select PK from ISR_INDEXFONO where (IdWord = $idWord) and (IdField = $idField )", $w);
00371 }
00372 else
00373 {
00374 $idWord = $this->getIdWord($word);
00375 $w = str_replace($word, "select PK from ISR_INDEX where (IdWord = $idWord) and (IdField = $idField )", $w);
00376 }
00377
00378 $last = $w = (($last != 'E') && ($last != 'OU') && strlen($cmd)) ? "E ($w)" : "($w)";
00379 }
00380 $cmd .= " $w ";
00381 $w = '';
00382 }
00383 }
00384 else
00385 $w .= $c;
00386 }
00387
00388 $cmd = str_replace(' OU ', ' UNION ', $cmd);
00389 $cmd = str_replace(' E ', ' INTERSECT ', $cmd);
00390 $query = $this->conn->getQueryCommand($cmd);
00391 $check = $query->chunkResult(0, 0);
00392 $n = 0;
00393 $s = '';
00394 $cmd = '';
00395 foreach ($check as $pk)
00396 {
00397 $s .= (($s != '') ? ',' : '') . "'$pk'";
00398
00399 if ($n++ == 100)
00400 {
00401 $cmd .= (($cmd != '') ? ' UNION ' : '') . "select $fields from $tableName where $key in ($s)";
00402 $n = 0;
00403 $s = '';
00404 }
00405 }
00406 if ($n != 0)
00407 {
00408 $cmd .= (($cmd != '') ? ' UNION ' : '') . "select $fields from $tableName where $key in ($s)";
00409 }
00410 if ($cmd == '')
00411 $cmd = "select $fields from $tableName where $key in ('')";
00412 $q = $this->conn->getQueryCommand($cmd);
00413 return $q;
00414 }
00415
00416 function fonetize($word)
00417 {
00418 return $this->phonetic->fonetize($word);
00419 }
00420 }
00421 ?>