/usr/local/miolo2/classes/database/misr.class

Go to the documentation of this file.
00001 <?php
00002 class MISR
00003 {
00004     var $conn; // connection identifier
00005     var $phonetic;
00006     var $stopWords;
00007 
00008     function __construct($conn)
00009     {
00010         $this->conn = $conn;
00011         $this->phonetic = new MPhonetic();
00012     }
00013 
00014     private function setStopWords()
00015     {
00016         $this->stopWords = array
00017             (
00018             'QUE',
00019             'PARA',
00020             'COM',
00021             'NÃO',
00022             'UMA',
00023             'POR',
00024             'MAIS',
00025             'DOS',
00026             'COMO',
00027             'MAS',
00028             'FOI',
00029             'ELE',
00030             'DAS',
00031             'TEM',
00032             'SEU',
00033             'SUA',
00034             'SER',
00035             'QUANDO',
00036             'MUITO',
00037             'NOS',
00038             'ESTÁ',
00039             'TAMBÉM',
00040             'PELO',
00041             'PELA',
00042             'ATÉ',
00043             'ISSO',
00044             'ELA',
00045             'ENTRE',
00046             'ERA',
00047             'DEPOIS',
00048             'SEM',
00049             'MESMO',
00050             'AOS',
00051             'SEUS',
00052             'QUEM',
00053             'NAS',
00054             'ESSE',
00055             'ESTÃO',
00056             'VOCÊ',
00057             'ESSA',
00058             'NUM',
00059             'NEM',
00060             'SUAS',
00061             'MEU',
00062             'MINHA',
00063             'NUMA',
00064             'PELOS',
00065             'ELAS',
00066             'HAVIA',
00067             'SEJA',
00068             'QUAL',
00069             'NÓS',
00070             'LHE',
00071             'DELES',
00072             'ESSAS',
00073             'ESSES',
00074             'PELAS',
00075             'ESTE',
00076             'DELE',
00077             'VOCÊS',
00078             'VOS',
00079             'LHES',
00080             'MEUS',
00081             'MINHAS',
00082             'TEU',
00083             'TUA',
00084             'TEUS',
00085             'TUAS',
00086             'NOSSO',
00087             'NOSSA',
00088             'NOSSOS',
00089             'NOSSAS',
00090             'DELA',
00091             'DELAS',
00092             'ESTA',
00093             'ESTES',
00094             'ESTAS',
00095             'AQUELE',
00096             'AQUELA',
00097             'AQUELES',
00098             'AQUELAS',
00099             'ISTO',
00100             'AQUILO',
00101             'ESTOU',
00102             'ESTÁ',
00103             'ESTAMOS',
00104             'ESTÃO',
00105             'ESTIVE',
00106             'ESTEVE',
00107             'ESTIVEMOS',
00108             'ESTIVERAM',
00109             'ESTAVA',
00110             'ESTÁVAMOS',
00111             'ESTAVAM',
00112             'ESTIVERA',
00113             'ESTIVÉRAMOS',
00114             'ESTEJA',
00115             'ESTEJAMOS',
00116             'ESTEJAM',
00117             'ESTIVESSE',
00118             'ESTIVÉSSEMOS',
00119             'ESTIVESSEM',
00120             'ESTIVER',
00121             'ESTIVERMOS',
00122             'ESTIVEREM',
00123             'HEI',
00124             'HAVEMOS',
00125             'HÃO',
00126             'HOUVE',
00127             'HOUVEMOS',
00128             'HOUVERAM',
00129             'HOUVERA',
00130             'HOUVÉRAMOS',
00131             'HAJA',
00132             'HAJAMOS',
00133             'HAJAM',
00134             'HOUVESSE',
00135             'HOUVÉSSEMOS',
00136             'HOUVESSEM',
00137             'HOUVER',
00138             'HOUVERMOS',
00139             'HOUVEREM',
00140             'HOUVEREI',
00141             'HOUVERÁ',
00142             'HOUVEREMOS',
00143             'HOUVERÃO',
00144             'HOUVERIA',
00145             'HOUVERÍAMOS',
00146             'HOUVERIAM',
00147             'SOU',
00148             'SOMOS',
00149             'SÃO',
00150             'ERA',
00151             'ÉRAMOS',
00152             'ERAM',
00153             'FUI',
00154             'FOI',
00155             'FOMOS',
00156             'FORAM',
00157             'FORA',
00158             'FÔRAMOS',
00159             'SEJA',
00160             'SEJAMOS',
00161             'SEJAM',
00162             'FOSSE',
00163             'FÔSSEMOS',
00164             'FOSSEM',
00165             'FOR',
00166             'FORMOS',
00167             'FOREM',
00168             'SEREI',
00169             'SERÁ',
00170             'SEREMOS',
00171             'SERÃO',
00172             'SERIA',
00173             'SERÍAMOS',
00174             'SERIAM',
00175             'TENHO',
00176             'TEM',
00177             'TEMOS',
00178             'TÉM',
00179             'TINHA',
00180             'TÍNHAMOS',
00181             'TINHAM',
00182             'TIVE',
00183             'TEVE',
00184             'TIVEMOS',
00185             'TIVERAM',
00186             'TIVERA',
00187             'TIVÉRAMOS',
00188             'TENHA',
00189             'TENHAMOS',
00190             'TENHAM',
00191             'TIVESSE',
00192             'TIVÉSSEMOS',
00193             'TIVESSEM',
00194             'TIVER',
00195             'TIVERMOS',
00196             'TIVEREM',
00197             'TEREI',
00198             'TERÁ',
00199             'TEREMOS',
00200             'TERÃO',
00201             'TERIA',
00202             'TERÍAMOS',
00203             'TERIAM'
00204             );
00205     }
00206 
00207     private function isValid($word)
00208     {
00209         $isValid = false;
00210 
00211         if (strlen($word) > 2)
00212         {
00213             $isValid = (array_search($word, $this->stopWords) === false);
00214         }
00215 
00216         return $isValid;
00217     }
00218 
00219     private function getTokens($string, &$array)
00220     {
00221         $tok = strtok($string, " ");
00222 
00223         while ($tok)
00224         {
00225             $tok = trim($tok);
00226 
00227             if (strlen($tok) > 2)
00228                 $array[] = $tok;
00229 
00230             $tok = strtok(" ");
00231         }
00232     }
00233 
00234     function getIdField($tableName, $fieldName)
00235     {
00236         $sql = new sql('idField', 'ISR_FIELD', '(TableName = ?) and (FieldName = ?)');
00237         $sql->SetParameters(strtoupper($tableName), strtoupper($fieldName));
00238         $query = $this->conn->getQuery($sql);
00239 
00240         if (!$query->eof)
00241         {
00242             $idField = $query->fields('idfield');
00243         }
00244         else
00245         {
00246             $db = $this->conn->db;
00247             $idField = $db->GETNewId('seq_isr_field');
00248             $sql = new sql('idField,TableName,FieldName', 'ISR_FIELD');
00249             $sql->SetParameters($idField, strtoupper($tableName), strtoupper($fieldName));
00250             $this->conn->Execute($sql->Insert());
00251         }
00252 
00253         return $idField;
00254     }
00255 
00256     function getIdWord($word)
00257     {
00258         $sql = new sql('idWord', 'ISR_WORD', "(word = '$word')");
00259         $query = $this->conn->getQuery($sql);
00260 
00261         if (!$query->eof)
00262         {
00263             $idWord = $query->fields('idWord');
00264         }
00265         else
00266         {
00267             $db = $this->conn->db;
00268             $idWord = $db->GETNewId('seq_isr_word');
00269             $sql = new sql('idWord,Word', 'ISR_WORD');
00270             $sql->SetParameters($idWord, $word);
00271             $this->conn->Execute($sql->Insert());
00272         }
00273 
00274         return $idWord;
00275     }
00276 
00277     function getIdWordFono($word)
00278     {
00279         $fonoWord = $this->fonetize($word);
00280         $sql = new sql('idWord', 'ISR_WORDFONO', "(word = '$fonoWord')");
00281         $query = $this->conn->getQuery($sql);
00282 
00283         if (!$query->eof)
00284         {
00285             $idWord = $query->fields('idWord');
00286         }
00287         else
00288         {
00289             $db = $this->conn->db;
00290             $idWord = $db->GETNewId('seq_isr_word');
00291             $sql = new sql('idWord,Word', 'ISR_WORDFONO');
00292             $sql->SetParameters($idWord, $fonoWord);
00293             $this->conn->Execute($sql->Insert());
00294         }
00295 
00296         return $idWord;
00297     }
00298 
00299     function indexer($tableName, $fieldname, $pk, $phrase, $fono = true)
00300     {
00301         $this->setStopWords();
00302         $idField = $this->getIdField($tableName, $fieldname);
00303         $this->getTokens($phrase, $words);
00304         $n = count($words);
00305 
00306         for ($i = 0; $i < $n; $i++)
00307         {
00308             $word = strtoupper(trim($words[$i]));
00309 
00310             if ($this->isValid($word))
00311             {
00312                 $idWord = $this->getIdWord($word);
00313                 $sql = new sql('idField, idWord, PK', 'ISR_INDEX');
00314                 $sql->SetParameters($idField, $idWord, $pk);
00315                 $this->conn->Execute($sql->Insert());
00316 
00317                 if ($fono)
00318                 {
00319                     $idWord = $this->getIdWordFono($word);
00320                     $sql = new sql('idField, idWord, PK', 'ISR_INDEXFONO');
00321                     $sql->SetParameters($idField, $idWord, $pk);
00322                     $this->conn->Execute($sql->Insert());
00323                 }
00324             }
00325         }
00326     }
00327 
00328     function delete($tableName, $fieldname, $pk)
00329     {
00330         $idField = $this->getIdField($tableName, $fieldname);
00331         $sql = new sql('', 'ISR_INDEX', '(idField = ?) and (pk = ?)');
00332         $sql->SetParameters($idField, $pk);
00333         $this->conn->Execute($sql->Delete());
00334     }
00335 
00336     function retrieve($tableName, $fieldName, $phrase, $key, $fields = '*', $fono = true)
00337     {
00338         $phrase = trim(strtoupper($phrase));
00339         $phrase = $this->phonetic->removeMultiple($phrase);
00340         $phrase = $this->phonetic->removeStrange($phrase);
00341         $phrase = $this->phonetic->removeAccentuation($phrase);
00342 
00343         $idField = $this->getIdField($tableName, $fieldName);
00344         $phrase .= ' ';
00345         $n = strlen($phrase);
00346         $w = '';
00347         $cmd = '';
00348 
00349         for ($i = 0; $i < $n; $i++)
00350         {
00351             $c = $phrase{$i};
00352             if ($c == chr(32))
00353             {
00354                 if ($w != '')
00355                 {
00356                     if (strlen($w) < 3)
00357                     {
00358                         if (($w == 'OU') || ($w == 'E'))
00359                             $last = $w;
00360                         else
00361                             $w = '';
00362                     }
00363                     else
00364                     {
00365                         $word = str_replace('(', '', str_replace(')', '', $w));
00366                         if ($fono)
00367                         {
00368                             $wordfono = $this->fonetize($word);
00369                             $idWord = $this->getIdWordFono($wordfono);
00370                             $w = str_replace($word, "select PK from ISR_INDEXFONO where (IdWord = $idWord) and (IdField = $idField )", $w);
00371                         }
00372                         else
00373                         {
00374                             $idWord = $this->getIdWord($word);
00375                             $w = str_replace($word, "select PK from ISR_INDEX where (IdWord = $idWord) and (IdField = $idField )", $w);
00376                         }
00377 
00378                         $last = $w = (($last != 'E') && ($last != 'OU') && strlen($cmd)) ? "E ($w)" : "($w)";
00379                     }
00380                     $cmd .= " $w ";
00381                     $w = '';
00382                 }
00383             }
00384             else
00385                 $w .= $c;
00386         }
00387 
00388         $cmd = str_replace(' OU ', ' UNION ', $cmd);
00389         $cmd = str_replace(' E ', ' INTERSECT ', $cmd);
00390         $query = $this->conn->getQueryCommand($cmd);
00391         $check = $query->chunkResult(0, 0);
00392         $n = 0;
00393         $s = '';
00394         $cmd = '';
00395         foreach ($check as $pk)
00396         {
00397             $s .= (($s != '') ? ',' : '') . "'$pk'";
00398 
00399             if ($n++ == 100)
00400             {
00401                 $cmd .= (($cmd != '') ? ' UNION ' : '') . "select $fields from $tableName where $key in ($s)";
00402                 $n = 0;
00403                 $s = '';
00404             }
00405         }
00406         if ($n != 0)
00407         {
00408             $cmd .= (($cmd != '') ? ' UNION ' : '') . "select $fields from $tableName where $key in ($s)";
00409         }
00410         if ($cmd == '')
00411             $cmd = "select $fields from $tableName where $key in ('')";
00412         $q = $this->conn->getQueryCommand($cmd);
00413         return $q;
00414     }
00415 
00416     function fonetize($word)
00417     {
00418         return $this->phonetic->fonetize($word);
00419     }
00420 }
00421 ?>
CopyLeft (L) 2001-2006 - [MIOLO Development Team] SOLIS - Cooperativa de Soluções Livres - Lajeado/RS - Brasil