15: Lexical Scanner Implementation (5)

2018/10/30

Finally, the scanner could identify a multikeyword, such as insert into, create table.

The algorithm for the scanner is really straightforward. Check the first word to see if the word could be a multikeyword. If the keyword has three words, read the following two words and check if the string match a multikeyword.

The algorithm looks some how ugly. I would like to refactor it later.

There are also tests for the scanner, just take a look at the bottom of lexer.rs.

sql/lexer.rs

// if this is possible a multikeyword, search the following chars
match symbol::check_multi_keywords_front(word) {
    // parts<Vec[u32]> for how many parts in this possible keyword
    Some(parts) => {
        println!("The word `{}` might be a multikeyword", word);

        for keyword_total_parts in parts {
            println!("Assume this keyword has {} parts", keyword_total_parts);

            // copy remaining chars for testing
            let mut test_chars = chars.as_str().chars();
            // for testing if the string a multikeyword. Insert the first word
            // and a space already. (because start scanning from next word)
            let mut test_str = String::from(format!("{} ", word));

            // for checking a new word
            let mut is_last_letter = false;

            // record the right cursor position when checking if multikeyword
            // if match a multikeyword, shift right cursor with steps
            let mut step_counter = 0;

            // How many words added in the test_str
            // if the keyword is 3 parts, the following_parts should be 2
            let mut following_parts = 0;

            loop {
                match test_chars.next() {
                    Some(y) => {
                        // A multikeyword should be all ASCII alphabetic character
                        if y.is_ascii_alphabetic() {
                            if !is_last_letter {
                                is_last_letter = true;
                            }
                            test_str.push(y);
                        } else {
                            match y {
                                ' ' | '\t' | '\r' | '\n' => {
                                    if is_last_letter {
                                        // from letter to space, count one
                                        following_parts += 1;
                                        // find enough parts, break earlier
                                        if following_parts
                                            == keyword_total_parts - 1
                                        {
                                            break; // loop
                                        }
                                        // add ` ` between words
                                        test_str.push(' ');
                                        is_last_letter = false
                                    }
                                }
                                // &, %, *, @, etc.
                                // keywords must be letters
                                _ => break, // loop
                            }
                        }
                    }
                    None => break, // loop
                }
                step_counter += 1;
            }

            println!("Checking `{}` ...", test_str);
            match symbol::SYMBOLS.get(test_str.as_str()) {
                // a multikeyword
                Some(token) => {
                    println!("Found keyword `{}`", test_str);
                    self.tokens.push(token.clone());

                    // shift the right cursor to the right of multikeyword
                    self.pos.cursor_r += step_counter;
                    // skip the chars included in this multikeyword
                    for _ in 0..step_counter {
                        chars.next();
                    }

                    is_multi_keyword = true;
                    break; // parts
                }
                None => println!("`{}` not a keyword", test_str),
            }
        }
    }
    None => {}
}

results matching ""

    No results matching ""