Mysql
 sql >> Teknologi Basis Data >  >> RDS >> Mysql

parsing permintaan Pencarian Teks Lengkap yang diketik pengguna ke dalam klausa WHERE MySQL menggunakan PHP

Kode berikut terdiri dari kelas Tokenizer, Token, dan QueryBuilder. Ini mungkin bukan solusi paling elegan yang pernah ada, tetapi sebenarnya melakukan apa yang Anda minta:

<?
// QueryBuilder Grammar:
// =====================
// SearchRule       := SimpleSearchRule { KeyWord }
// SimpleSearchRule := Expression | SimpleSearchRule { 'OR' Expression }
// Expression       := SimpleExpression | Expression { 'AND' SimpleExpression }
// SimpleExpression := '(' SimpleSearchRule ')' | FieldExpression

$input = 'from:me AND (to:john OR to:jenny) dinner party';

$fieldMapping = array(
    'id' => 'id',
    'from' => 'from',
    'to' => 'to',
    'title' => 'title',
    'description' => 'description',
    'time_created' => 'time_created'
);
$fullTextFields = array('title','description');

$qb = new QueryBuilder($fieldMapping, $fullTextFields);
try {
    echo $qb->parseSearchRule($input);
} catch(Exception $error) {
    echo 'Error occurred while parsing search query: <br/>'.$error->getMessage();
}

class Token {
    const   KEYWORD = 'KEYWORD',
            OPEN_PAR='OPEN_PAR',
            CLOSE_PAR='CLOSE_PAR',
            FIELD='FIELD',
            AND_OP='AND_OP',
            OR_OP='OR_OP';
    public $type;
    public $chars;
    public $position;

    function __construct($type,$chars,$position) {
        $this->type = $type;
        $this->chars = $chars;
        $this->position = $position;
    }

    function __toString() {
        return 'Token[ type='.$this->type.', chars='.$this->chars.', position='.$this->position.' ]';
    }
}

class Tokenizer {
    private $tokens = array();
    private $input;
    private $currentPosition;

    function __construct($input) {
        $this->input = trim($input);
        $this->currentPosition = 0;
    }

    /**
     * @return Token
     */
    function getToken() {
        if(count($this->tokens)==0) {
            $token = $this->nextToken();
            if($token==null) {
                return null;
            }
            array_push($this->tokens, $token);
        }
        return $this->tokens[0];
    }

    function consumeToken() {
        $token = $this->getToken();
        if($token==null) {
            return null;
        }
        array_shift($this->tokens);
        return $token;
    }

    protected function nextToken() {
        $reservedCharacters = '\:\s\(\)';
        $fieldExpr = '/^([^'.$reservedCharacters.']+)\:([^'.$reservedCharacters.']+)/';
        $keyWord = '/^([^'.$reservedCharacters.']+)/';
        $andOperator = '/^AND\s/';
        $orOperator = '/^OR\s/';
        // Remove whitespaces ..
        $whiteSpaces = '/^\s+/';
        $remaining = substr($this->input,$this->currentPosition);
        if(preg_match($whiteSpaces, $remaining, $matches)) {
            $this->currentPosition += strlen($matches[0]);
            $remaining = substr($this->input,$this->currentPosition);
        }
        if($remaining=='') {
            return null;
        }
        switch(substr($remaining,0,1)) {
            case '(':
                return new Token(Token::OPEN_PAR,'(',$this->currentPosition++);
            case ')':
                return new Token(Token::CLOSE_PAR,')',$this->currentPosition++);
        }
        if(preg_match($fieldExpr, $remaining, $matches)) {
            $token = new Token(Token::FIELD, $matches[0], $this->currentPosition);
            $this->currentPosition += strlen($matches[0]);
        } else if(preg_match($andOperator, $remaining, $matches)) {
            $token = new Token(Token::AND_OP, 'AND', $this->currentPosition);
            $this->currentPosition += 3;
        } else if(preg_match($orOperator, $remaining, $matches)) {
            $token = new Token(Token::OR_OP, 'OR', $this->currentPosition);
            $this->currentPosition += 2;
        } else if(preg_match($keyWord, $remaining, $matches)) {
            $token = new Token(Token::KEYWORD, $matches[0], $this->currentPosition);
            $this->currentPosition += strlen($matches[0]);
        } else throw new Exception('Unable to tokenize: '.$remaining);
        return $token;
    }
}

class QueryBuilder {
    private $fieldMapping;
    private $fulltextFields;

    function __construct($fieldMapping, $fulltextFields) {
        $this->fieldMapping = $fieldMapping;
        $this->fulltextFields = $fulltextFields;
    }

    function parseSearchRule($input) {
        $t = new Tokenizer($input);
        $token = $t->getToken();
        if($token==null) {
            return '';
        }
        $token = $t->getToken();
        if($token->type!=Token::KEYWORD) {
            $searchRule = $this->parseSimpleSearchRule($t);
        } else {
            $searchRule = '';
        }
        $keywords = '';
        while($token = $t->consumeToken()) {
            if($token->type!=Token::KEYWORD) {
                throw new Exception('Only keywords allowed at end of search rule.');
            }
            if($keywords!='') {
                $keywords .= ' ';
            }
            $keywords .= $token->chars;
        }
        if($keywords!='') {
            $matchClause = 'MATCH (`'.(implode('`,`',$this->fulltextFields)).'`) AGAINST (';
            $keywords = $matchClause.'\''.mysql_real_escape_string($keywords).'\' IN BOOLEAN MODE)';
            if($searchRule=='') {
                $searchRule = $keywords;
            } else {
                $searchRule = '('.$searchRule.') AND ('.$keywords.')';
            }
        }
        return $searchRule;
    }

    protected function parseSimpleSearchRule(Tokenizer $t) {
        $expressions = array();
        do {
            $repeat = false;
            $expressions[] = $this->parseExpression($t);
            $token = $t->getToken();
            if($token->type==Token::OR_OP) {
                $t->consumeToken();
                $repeat = true;
            }
        } while($repeat);
        return implode(' OR ', $expressions);
    }

    protected function parseExpression(Tokenizer $t) {
        $expressions = array();
        do {
            $repeat = false;
            $expressions[] = $this->parseSimpleExpression($t);
            $token = $t->getToken();
            if($token->type==Token::AND_OP) {
                $t->consumeToken();
                $repeat = true;
            }
        } while($repeat);
        return implode(' AND ', $expressions);
    }

    protected function parseSimpleExpression(Tokenizer $t) {
        $token = $t->consumeToken();
        if($token->type==Token::OPEN_PAR) {
            $spr = $this->parseSimpleSearchRule($t);
            $token = $t->consumeToken();
            if($token==null || $token->type!=Token::CLOSE_PAR) {
                throw new Exception('Expected closing parenthesis, found: '.$token->chars);
            }
            return '('.$spr.')';
        } else if($token->type==Token::FIELD) {
            $fieldVal = explode(':', $token->chars,2);
            if(isset($this->fieldMapping[$fieldVal[0]])) {
                return '`'.$this->fieldMapping[$fieldVal[0]].'` = \''.mysql_real_escape_string($fieldVal[1]).'\'';
            }
            throw new Exception('Unknown field selected: '.$token->chars);
        } else {
            throw new Exception('Expected opening parenthesis or field-expression, found: '.$token->chars);
        }
    }
}
?>

Solusi yang lebih tepat pertama-tama akan membuat pohon parse, lalu mengubahnya menjadi kueri, setelah beberapa analisis lebih lanjut.



  1. Database
  2.   
  3. Mysql
  4.   
  5. Oracle
  6.   
  7. Sqlserver
  8.   
  9. PostgreSQL
  10.   
  11. Access
  12.   
  13. SQLite
  14.   
  15. MariaDB
  1. Cetak info debug dari prosedur tersimpan di MySQL

  2. Apakah VARCHAR(20000) valid di MySQL?

  3. Mengurutkan subpohon dalam struktur data hierarki tabel penutupan

  4. Kolom yang dipilih tidak memiliki tipe yang kompatibel, meskipun memiliki tipe yang sama

  5. Apakah Diucapkan "S-Q-L" atau "Sekuel"?