ThinkChat2.0新版上线,更智能更精彩,支持会话、画图、阅读、搜索等,送10W Token,即刻开启你的AI之旅 广告
[TOC] ## 概述 提取关键字和变量名 提取字符串 提取操作符 提取数字 ## 方式 ### 提取关键词或变量 **状态机描述:** ![](https://img.kancloud.cn/91/e6/91e67d23073c1dfce9cce73e43f56568_800x195.png) <details> <summary>token.java</summary> ``` public static Token makeVarOrKeyWord(PeekIterator<Character> it) { String s = ""; // 获取一个字符串 while (it.hasNext()) { Character lookahead = it.peek(); if (AlphabetHelper.isLetter(lookahead)) { s += lookahead; } else { break; } it.next(); } // 判断是否是关键词 if (KeyWords.isKeyWord(s)) { return new Token(TokenType.KEYWORD, s); } if (s.equals("true") || s.equals("false")) { return new Token(TokenType.BOOLEAN, s); } return new Token(TokenType.VARIABLE, s); } ``` </details> <br /> ### 提取字符串 **状态机描述:** ![](https://img.kancloud.cn/57/5f/575f430b105d529527df7c7ea5543047_400x187.png) - 如果两边都为单引号或双引号,则为字符串 <details> <summary>token.java</summary> ``` public static Token makeString(PeekIterator<Character> it) throws LexicalException { StringBuilder s= new StringBuilder(); int state = 0; while(it.hasNext()){ char c = it.next(); switch(state){ case 0: if (c=='\''){ state=1; }else if (c=='\"'){ state=2; } s.append(c); break; case 1: if (c=='\''){ return new Token(ToKenType.STRING,s.toString()+c); }else{ s.append(c); } break; case 2: if (c=='\"'){ return new Token(ToKenType.STRING,s.toString()+c); }else{ s.append(c); } break; } } // end while // 不可能到这里,但是为了 java的规范,添加一句 throw new LexicalException("Unexpected error"); ``` </details> <br /> ### 提取操作符 **状态机描述:** <details> <summary>点击打开状态机描述</summary> ![](https://img.kancloud.cn/56/02/5602757aea59d814fae73b46fc2d1544_1078x5260.png) </details> <br /> <details> <summary>token.java</summary> ``` public static Token makeOp(PeekIterator<Character> it) throws LexicalException { int state = 0; while (it.hasNext()) { char lookahead = it.next(); switch (state) { case 0: switch (lookahead) { case '+': state = 1; break; case '-': state = 2; break; case '*': state = 3; break; case '/': state = 4; break; case '>': state = 5; break; case '<': state = 6; break; case '=': state = 7; break; case '!': state = 8; break; case '&': state = 9; break; case '|': state = 10; break; case '^': state = 11; break; case '%': state = 12; break; case ',': return new Token(TokenType.OPERATOR, ","); case ';': return new Token(TokenType.OPERATOR, ";"); }// while end break; case 1: if (lookahead=='+'){ return new Token(TokenType.OPERATOR,"++"); }else if (lookahead == '='){ return new Token(TokenType.OPERATOR,"+="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"+"); } case 2: if (lookahead=='-'){ return new Token(TokenType.OPERATOR,"--"); }else if (lookahead == '='){ return new Token(TokenType.OPERATOR,"-="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"+"); } case 3: if (lookahead == '='){ return new Token(TokenType.OPERATOR,"*="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"*"); } case 4: if (lookahead == '='){ return new Token(TokenType.OPERATOR,"/="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"/"); } case 5: if ( lookahead == '='){ return new Token(TokenType.OPERATOR,">="); }else if (lookahead == '>'){ return new Token(TokenType.OPERATOR,">>"); }else{ it.putBack(); return new Token(TokenType.OPERATOR,">"); } case 6: if ( lookahead == '='){ return new Token(TokenType.OPERATOR,"<="); }else if (lookahead == '<'){ return new Token(TokenType.OPERATOR,"<<"); }else{ it.putBack(); return new Token(TokenType.OPERATOR,">"); } case 7: if ( lookahead == '='){ return new Token(TokenType.OPERATOR,"=="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"="); } case 8: if ( lookahead == '='){ return new Token(TokenType.OPERATOR,"!="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"!"); } case 9: if ( lookahead == '&'){ return new Token(TokenType.OPERATOR,"&&"); }else if (lookahead=='='){ return new Token(TokenType.OPERATOR,"&="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"&"); } case 10: if(lookahead == '|') { return new Token(TokenType.OPERATOR, "||"); } else if (lookahead == '=') { return new Token(TokenType.OPERATOR, "|="); } else { it.putBack(); return new Token(TokenType.OPERATOR, "|"); } case 11: if(lookahead == '^') { return new Token(TokenType.OPERATOR, "^^"); } else if (lookahead == '=') { return new Token(TokenType.OPERATOR, "^="); } else { it.putBack(); return new Token(TokenType.OPERATOR, "^"); } case 12: if (lookahead == '=') { return new Token(TokenType.OPERATOR, "%="); } else { it.putBack(); return new Token(TokenType.OPERATOR, "%"); } } } throw new LexicalException("Unexpected error"); } ``` </details> <br /> ### 提取数字 **状态机描述:** ![](https://img.kancloud.cn/c4/b0/c4b01b6bd40ed6b2b59f6cab96007569_800x268.png) 简单说明: - 查看状态 "0"->"1" 这条状态,发现若用户一致输入"0",并接下来输入一个非"1-9",非".",则说明这个整数为0 <br/> <details> <summary>token.java</summary> ``` public static Token makeNumber(PeekIterator<Character> it )throws LexicalException{ StringBuilder s= new StringBuilder(); int state =0; while(it.hasNext()){ char lookahead = it.next(); switch (state){ case 0: if (lookahead=='0'){ state=1; }else if(AlphabetHelper.isNumber(lookahead)){ state=2; }else if(lookahead=='+' || lookahead=='-' ){ state=3; }else if(lookahead=='.' ){ state=5; } break; case 1: if (lookahead =='0'){ state=1; }else if (AlphabetHelper.isNumber(lookahead)){ state=2; }else if(lookahead=='.'){ state=4; }else{ return new Token(TokenType.INTEGER, s.toString()); } break; case 2: if (AlphabetHelper.isNumber(lookahead)){ state=2; }else if (lookahead=='.'){ state=4; }else{ return new Token(TokenType.INTEGER, s.toString()); } break; case 3: if (AlphabetHelper.isNumber(lookahead)){ state=2; }else if(lookahead=='.'){ state=5; }else { throw new LexicalException(lookahead); } break; case 4: if (lookahead=='.'){ throw new LexicalException(lookahead); }else if (AlphabetHelper.isNumber(lookahead)){ state=20; }else{ return new Token(TokenType.FLOAT, s.toString()); } break; case 5: if (AlphabetHelper.isNumber(lookahead)){ state=20; }else{ throw new LexicalException(lookahead); } break; case 20: if (AlphabetHelper.isNumber(lookahead)){ state=20; }else if (lookahead=='.'){ throw new LexicalException(lookahead); }else { return new Token(TokenType.FLOAT, s.toString()); } } // end switch s.append(lookahead); }// end while throw new LexicalException("Unexpected err "); } ``` </details> <br />