Corpus.java
package edu.odu.cs.cs350;
import java.util.ArrayList;
import java.util.List;
/**
* Represents a collection (corpus) of {@link Document} objects.
* <p>
* The Corpus class manages a list of documents and provides utility
* methods for corpus-level statistics, such as counting how many
* documents contain a given word.
* </p>
*/
public class Corpus {
/** The list of documents contained in this corpus. */
private final List<Document> documents;
/**
* Constructs an empty Corpus.
*/
public Corpus() {
documents = new ArrayList<>();
}
/**
* Adds a {@link Document} to this corpus.
*
* @param doc the document to add
*/
public void addDocument(Document doc) {
documents.add(doc);
}
/**
* Returns the list of all {@link Document} objects in this corpus.
*
* @return a list of documents
*/
public List<Document> getDocuments() {
return documents;
}
/**
* Returns the total number of documents in this corpus.
*
* @return the number of documents
*/
public int getTotalDocuments() {
return documents.size();
}
/**
* Counts how many documents in the corpus contain a given word.
* <p>
* This is useful for computing the inverse document frequency (IDF)
* part of the TF-IDF calculation.
* </p>
*
* @param word the word to search for
* @return the number of documents containing the given word
*/
public int getDocumentCountContaining(String word) {
int count = 0;
for (Document doc : documents) {
if (doc.getWords().containsKey(word)) {
count++;
}
}
return count;
}
}