txtFileProcessor.java
package edu.odu.cs.cs350;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Scanner;
import java.util.Set;
/**
* A utility class for processing plain text (.txt) files.
*
*/
public class txtFileProcessor {
/**
* The path to the text file read in to the processor.
*/
private final String filePath;
/**
* Constructor for txtFileProcessor.
*
* @param filePath the path to the text file to be processed
*/
public txtFileProcessor(String filePath) {
this.filePath = filePath;
}
/**
* Optional set of English stop words to ignore during processing.
* @return null
*/
private static final Set<String> STOP_WORDS = Set.of(
"a","about","above","after","again","against","all","am","an","and","any","are","aren't",
"as","at","be","because","been","before","being","below","between","both","but","by",
"could","couldn't","did","didn't","do","does","doesn't","doing","don't","down","during",
"each","few","for","from","further","had","hadn't","has","hasn't","have","haven't","having",
"he","he'd","he'll","he's","her","here","here's","hers","herself","him","himself","his",
"how","how's","i","i'd","i'll","i'm","i've","if","in","into","is","isn't","it","it's","its",
"itself","let's","me","more","most","mustn't","my","myself","no","nor","not","of","off","on",
"once","only","or","other","ought","our","ours","ourselves","out","over","own","same","shan't",
"she","she'd","she'll","she's","should","shouldn't","so","some","such","than","that","that's",
"the","their","theirs","them","themselves","then","there","there's","these","they","they'd",
"they'll","they're","they've","this","those","through","to","too","under","until","up","very",
"was","wasn't","we","we'd","we'll","we're","we've","were","weren't","what","what's","when",
"when's","where","where's","which","while","who","who's","whom","why","why's","with","won't",
"would","wouldn't","you","you'd","you'll","you're","you've","your","yours","yourself",
"yourselves"
);
/** Returns the file path associated with this text file processor.
* @return the file path
*/
public String getFilePath() {
return filePath;
}
/**
* Reads the text file and converts it into a Document object,
* using tokenization and removing common stopwords.
*
* @return a Document object representing the file's contents, or null if the file is invalid
*/
public Document processFile() {
File inFile = new File(filePath);
if (!inFile.exists() || !inFile.isFile()) {
System.err.println("Invalid file: " + filePath);
return null;
}
Document doc = new Document(inFile.getName());
try (Scanner scanner = new Scanner(inFile)) {
scanner.useDelimiter("[^A-Za-z0-9'-]+");
while (scanner.hasNext()) {
String word = scanner.next().toLowerCase();
if (!word.isEmpty() && !STOP_WORDS.contains(word)) {
doc.addWord(word);
}
}
} catch (FileNotFoundException e) {
System.err.println("Could not open file: " + filePath);
return null;
}
return doc;
}
}