-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTokenization.java
More file actions
90 lines (80 loc) · 4.16 KB
/
Tokenization.java
File metadata and controls
90 lines (80 loc) · 4.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/*******************************************************************
* Tokenization Class *
* *
* PROGRAMMER: Emily Culp*
* COURSE: CS340 - Programming Language Design*
* DATE: 11/12/2024 *
* REQUIREMENT: Tokenization for the interpreter *
* *
* DESCRIPTION: *
* This class is responsible for tokenizing input commands into an array of tokens. *
* It breaks down the command string based on predefined delimiters and returns *
* an array of strings representing individual tokens. This functionality is essential *
* for processing commands in the interpreter. *
* *
* COPYRIGHT: This code is copyright (C) 2024 Emily Culp and Dean Zeller. *
* *
* CREDITS: This code was written with the help of ChatGPT. *
* *
*******************************************************************/
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Tokenization {
private static final String TOKEN_REGEX = "\"[^\"]*\"|\\d+\\.\\d+|\\d+|\\w+|>=|<=|==|!=|\\+\\+|--|[+\\-*/=(){}^<>.,?!:\"'\\[\\]]|;";
/**********************************************************
* METHOD: tokenize(String command) *
* DESCRIPTION: Tokenizes a given command string into an array of tokens. *
* PARAMETERS: String command - the command string to tokenize *
* RETURN VALUE: String[] - an array of tokens extracted from the command *
**********************************************************/
public static String[] tokenize(String command) {
List<String> tokens = new ArrayList<>();
Matcher matcher = Pattern.compile(TOKEN_REGEX).matcher(command);
while (matcher.find()) {
String token = matcher.group().trim();
// Skip empty spaces and handle comments (if needed)
if (!token.isEmpty() && !token.equals(" ")) {
tokens.add(token);
}
}
System.out.println("Tokens: " + Arrays.toString(tokens.toArray()));
return tokens.toArray(new String[0]);
}
/**********************************************************
* METHOD: preprocessInput(String input) *
* DESCRIPTION: Handles multi-line structures, ensuring *
* braces and constructs are parsed correctly*
* PARAMETERS: String input - Raw input string to process.*
* RETURN VALUE: String - Processed single-line string. *
**********************************************************/
public static String preprocessInput(String input) {
// Merge multi-line constructs into a single line for consistent tokenization
return input.replaceAll("\\n\\s*\\{", " {").replaceAll("\\}\\s*\\n", "} ").replaceAll("\\s*\\n\\s*", " ");
}
/**********************************************************
* METHOD: getTokenType(String token) *
* DESCRIPTION: Determines the type of a given token. *
* It checks if the token is a literal, *
* operator, keyword, or identifier. The method *
* uses regular expressions to classify the *
* token into one of these categories. *
* PARAMETERS: String token - The token whose type is to be determined. *
* RETURN VALUE: String - A string representing the type of *
* the token. Possible return values include: *
* "Literal", "Operator", "Keyword", "Identifier". *
* EXCEPTIONS: None *
**********************************************************/
// Helper method to identify token types (keywords, literals, operators, etc.)
private static String getTokenType(String token) {
if (token.matches("\\d+\\.\\d+")) return "Literal (Double)";
if (token.matches("\\d+")) return "Literal (Integer)";
if (token.equals("true") || token.equals("false")) return "BooleanLiteral";
if (token.matches("[+\\-*/=(){}^<>.,?!:\"'\\[\\]]")) return "Operator";
if (token.matches("integer|input|print|boolean|double|string")) return "Keyword";
return "Identifier";
}
}