//--------------------------------------------------------------------------- //#pragma hdrstop //--------------------------------------------------------------------------- #include #include #include enum LexerState { _START, HTTP_H, HTTP_T1, HTTP_T2, HTTP_P, HTTP_COLON, HTTP_SLASH1, HTTP_SLASH2, FTP_F, FTP_T, FTP_P, FTP_COLON, FTP_SLASH1, FTP_SLASH2, TELNET_T1, TELNET_E1, TELNET_L, TELNET_N, TELNET_E2, TELNET_T2, TELNET_COLON, TELNET_SLASH1, TELNET_SLASH2, MAILTO_M, MAILTO_A, MAILTO_I, MAILTO_L, MAILTO_T, MAILTO_O, MAILTO_COLON, DIGIT, ALPHA, _STOP }; enum Token { DIGITS, ALPHAS, HTTP, FTP, TELNET, MAILTO, DOT, COLON, SEMICOLON, QUESTIONMARK, AT, SLASH, PLUS, _END }; void error(char c, int pos) { printf("Unexpected '%c' at position: %d", c, pos); exit(1); } bool isAlpha(char c) { return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')); } bool isDigit(char c) { return (c >='0') && (c <= '9'); } void lexer(char *stream, Token *units) { int i = 0; int j = 0; char c; LexerState state = _START; while(state != _STOP) { c = stream[i]; //cout << "position: " << i << " state: " << state << " input: " << c << " code: " << (int)c << endl; switch(state) { case _START: switch(c) { case 0: state = _STOP; i--; break; case 'h': state = HTTP_H; break; case 'f': state = FTP_F; break; case 't': state = TELNET_T1; break; case 'm': state = MAILTO_M; break; case '.': units[j++] = DOT; break; case ':': units[j++] = COLON; break; case ';': units[j++] = SEMICOLON; break; case '?': units[j++] = QUESTIONMARK; break; case '@': units[j++] = AT; break; case '/': units[j++] = SLASH; break; case '+': units[j++] = PLUS; break; default: if(isAlpha(c)) { state = ALPHA; } else if(isDigit(c)) { state = DIGIT; } else { error(c, i); } break; } break; case DIGIT: if(!isDigit(c)) { state = _START; units[j++] = DIGITS; i--; } break; case ALPHA: if(!isAlpha(c)) { state = _START; units[j++] = ALPHAS; i--; } break; // HTTP case HTTP_H: if(c == 't') { state = HTTP_T1; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case HTTP_T1: if(c == 't') { state = HTTP_T2; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case HTTP_T2: if(c == 'p') { state = HTTP_P; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case HTTP_P: if(c == ':') { state = HTTP_COLON; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case HTTP_COLON: if(c == '/') { state = HTTP_SLASH1; } else { error(c, i); } break; case HTTP_SLASH1: if(c == '/') { state = HTTP_SLASH2; } else { error(c, i); } break; case HTTP_SLASH2: units[j++] = HTTP; state = _START; i--; break; // FTP case FTP_F: if(c == 't') { state = FTP_T; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case FTP_T: if(c == 'p') { state = FTP_P; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case FTP_P: if(c == ':') { state = FTP_COLON; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case FTP_COLON: if(c == '/') { state = FTP_SLASH1; } else { error(c, i); } break; case FTP_SLASH1: if(c == '/') { state = FTP_SLASH2; } else { error(c, i); } break; case FTP_SLASH2: units[j++] = FTP; state = _START; i--; break; // TELNET case TELNET_T1: if(c == 'e') { state = TELNET_E1; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case TELNET_E1: if(c == 'l') { state = TELNET_L; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case TELNET_L: if(c == 'n') { state = TELNET_N; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case TELNET_N: if(c == 'e') { state = TELNET_E2; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case TELNET_E2: if(c == 't') { state = TELNET_T2; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case TELNET_T2: if(c == ':') { state = TELNET_COLON; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case TELNET_COLON: if(c == '/') { state = TELNET_SLASH1; } else { error(c, i); } break; case TELNET_SLASH1: if(c == '/') { state = TELNET_SLASH2; } else { error(c, i); } break; case TELNET_SLASH2: units[j++] = TELNET; state = _START; i--; break; // MAILTO case MAILTO_M: if(c == 'a') { state = MAILTO_A; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case MAILTO_A: if(c == 'i') { state = MAILTO_I; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case MAILTO_I: if(c == 'l') { state = MAILTO_L; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case MAILTO_L: if(c == 't') { state = MAILTO_T; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case MAILTO_T: if(c == 'o') { state = MAILTO_O; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case MAILTO_O: if(c == ':') { state = MAILTO_COLON; } else if(isAlpha(c)) { state = ALPHA; i--; } else if(isDigit(c)) { units[j++] = ALPHAS; state = DIGIT; i--; } else { error(c, i); } break; case MAILTO_COLON: units[j++] = MAILTO; state = _START; i--; break; } i++; if(state == _STOP) { units[j++] = _END; break; } } } void ll_W(); void ll_Q(); void ll_V(); void ll_H(); void ll_alpha(); void ll_beta(); void ll_O(); void ll_gama(); void ll_N(); void ll_delta(); void ll_X(); void ll_Y(); void ll_I(); void ll_C(); void ll_pi(); void ll_G(); void ll_S(); void ll_omega(); void ll_F(); void ll_L(); void ll_fi(); void ll_mi(); void ll_xi(); void ll_fork(); void ll_ro(); void ll_T(); void ll_M(); void ll_A(); void ll_D(); void errorSynt() { printf("Syntax Error -> Unexpected Token\n"); } Token *tokens; void checkToken(int token) { if (token == *tokens) { tokens++; } else { errorSynt(); } } void syntax(Token *t) { int i = 0; tokens = t; ll_W(); getchar(); } void ll_W() { switch (*tokens) { case HTTP: case FTP: case TELNET: case MAILTO: // 1 ll_V(); ll_Q(); break; default: errorSynt(); ; } } void ll_Q() { switch (*tokens) { case SEMICOLON: // 2 checkToken(SEMICOLON); ll_V(); ll_Q(); break; case _END: // 3 break; default: errorSynt(); ; } } void ll_V() { switch (*tokens) { case HTTP: // 4 ll_H(); break; case FTP: // 5 ll_F(); break; case TELNET: // 6 ll_T(); break; case MAILTO: // 7 ll_M(); break; default: errorSynt(); ; } } void ll_H() { switch (*tokens) { case HTTP: // 8 checkToken(HTTP); ll_O(); ll_alpha(); ll_beta(); break; default: errorSynt(); ; } } void ll_alpha() { switch (*tokens) { case SEMICOLON: case QUESTIONMARK: // 10 break; case SLASH: // 9 checkToken(SLASH); ll_C(); case _END: // 10 break; default: errorSynt(); ; } } void ll_beta() { switch (*tokens) { case SEMICOLON: // 12 break; case QUESTIONMARK: case AT: // 11 checkToken(QUESTIONMARK); ll_S(); break; case _END: // 12 break; default: errorSynt(); ; } } void ll_O() { switch (*tokens) { case COLON: case ALPHAS: case DIGITS: case _END: // 13 ll_N(); ll_gama(); break; default: errorSynt(); ; } } void ll_gama() { switch (*tokens) { case COLON: // 14 checkToken(COLON); ll_I(); break; case SEMICOLON: case QUESTIONMARK: case SLASH: case _END: // 15 break; default: errorSynt(); ; } } void ll_N() { switch (*tokens) { case DOT: case ALPHAS: case DIGITS: case _END: // 16 ll_X(); ll_delta(); break; default: errorSynt(); ; } } void ll_delta() { switch (*tokens) { case DOT: // 17 checkToken(DOT); ll_X(); ll_delta(); break; case COLON: case SEMICOLON: case QUESTIONMARK: case SLASH: case _END: // 18 break; default: errorSynt(); ; } } void ll_X() { switch (*tokens) { case DOT: case COLON: case SEMICOLON: case QUESTIONMARK: case AT: case SLASH: case PLUS: case _END: // 20 break; case ALPHAS: case DIGITS: // 19 ll_Y(); ll_X(); break; default: errorSynt(); ; } } void ll_Y() { switch (*tokens) { case ALPHAS: // 21 ll_A(); break; case DIGITS: // 22 ll_D(); break; default: errorSynt(); ; } } void ll_I() { switch (*tokens) { case SEMICOLON: case QUESTIONMARK: case SLASH: // 24 break; case DIGITS: // 23 ll_D(); ll_I(); break; case _END: // 24 break; default: errorSynt(); ; } } void ll_C() { switch (*tokens) { case SLASH: case ALPHAS: case DIGITS: case _END: // 25 !!!!!!!!!!!!!!!!!!!!!!!!! ll_G(); ll_pi(); break; default: errorSynt(); ; } } void ll_pi() { switch (*tokens) { case SEMICOLON: case QUESTIONMARK: // 27 break; case SLASH: // 26 checkToken(SLASH); ll_G(); ll_pi(); break; case _END: // 27 break; default: errorSynt(); ; } } void ll_G() { switch (*tokens) { case SEMICOLON: case QUESTIONMARK: case SLASH: // 29 break; case ALPHAS: case DIGITS: // 28 ll_Y(); ll_G(); break; case _END: // 29 break; default: errorSynt(); ; } } void ll_S() { switch (*tokens) { case PLUS: case ALPHAS: case DIGITS: case _END: // 30 ll_X(); ll_omega(); break; default: errorSynt(); ; } } void ll_omega() { switch (*tokens) { case SEMICOLON: case PLUS: // 31 checkToken(PLUS); ll_X(); ll_omega(); break; case _END: // 32 break; default: errorSynt(); ; } } void ll_F() { switch (*tokens) { case FTP: // 33 checkToken(FTP); ll_L(); checkToken(SLASH); ll_C(); break; default: errorSynt(); ; } } void ll_L() { switch (*tokens) { case DOT: case COLON: case SEMICOLON: case AT: case SLASH: case ALPHAS: case DIGITS: // (A) ll_X(); ll_mi(); break; default: errorSynt(); ; } } void ll_fi() { switch (*tokens) { case DOT: // (D) checkToken(DOT); ll_N(); ll_xi(); break; case COLON: // (E) checkToken(COLON); ll_fork(); break; case AT: // (F) checkToken(AT); ll_O(); break; default: errorSynt(); ; } } void ll_mi() { switch (*tokens) { case DOT: case COLON: // (B) ll_fi(); break; case SEMICOLON: // (C) break; case AT: // (B) ll_fi(); break; case SLASH: case _END: // (C) break; default: errorSynt(); ; } } void ll_xi() { switch (*tokens) { case COLON: // (G) checkToken(COLON); ll_I(); break; case SLASH: case _END: // (H) break; default: errorSynt(); ; } } void ll_fork() { switch (*tokens) { case ALPHAS: // (I) ll_A(); ll_G(); checkToken(AT); ll_O(); break; case DIGITS: // (J) ll_I(); ll_ro(); break; default: errorSynt(); ; } } void ll_ro() { switch (*tokens) { case AT: // (L) checkToken(AT); ll_O(); break; case SLASH: // (M) break; case ALPHAS: // (K) ll_A(); ll_G(); checkToken(AT); ll_O(); break; case _END: // (M) break; default: errorSynt(); ; } } void ll_T() { switch (*tokens) { case TELNET: // 39 checkToken(TELNET); ll_L(); break; default: errorSynt(); ; } } void ll_M() { switch (*tokens) { case MAILTO: // 40 checkToken(MAILTO); ll_X(); checkToken(AT); ll_N(); break; default: errorSynt(); ; } } void ll_A() { switch (*tokens) { case ALPHAS: // 41 checkToken(ALPHAS); break; default: errorSynt(); ; } } void ll_D() { switch (*tokens) { case DIGITS: // 42 checkToken(DIGITS); break; default: errorSynt(); ; } } //#pragma argsused int main(int argc, char *argv[]) { char in[100]; Token lexicalUnits[100]; cin >> in; lexer(in, lexicalUnits); for(int i = 0; lexicalUnits[i] != _END; i++) printf("%d\n", lexicalUnits[i]); syntax(lexicalUnits); return 0; } //---------------------------------------------------------------------------