/* Use, modification, and distribution are subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1.0.txt or copy at www.boost.org/LICENSE_1.0.txt) */ // Word class. // This class stores information about a specific word on a webpage. // It contains a string of the word, a vector of positions on the page // that the word occurs in, and a total wordcount. import java.util.*; /** * This class stores information about a specific word on a webpage. * @author Aaron Miller, Mike Ottum * @version 1.5 */ public class Word { /** A hash of HTML tags with the associated word position as the key. * Contains the various positions that the word appears on the page. */ protected HashMap tags; /** The actual word associated with the object. */ protected String word; /** The number of times that the word appears on the page. */ protected int count; /** * Initializes the data structures for the word (word_name) and * adds the position (pos). * @param word_name the string of the word itself. * @param pos the position of the first instance of the word on the webpage */ public Word(String word_name, int pos) { //positions = new Vector(); tags = new HashMap(); Integer i = new Integer(pos); //positions.add(i); word = word_name; count = 0; } /** * Associates HTML tags with this word at the given position. * Accepted tags are <b>, <i>, <u>, <h1>, * <h2>, <h3>, <h4>, <h5>, <h6>, * <img>, <title>, and <meta> * assign string value to each tag * string order: 0 b * 1 i * 2 u * 3 h1 * 4 h2 * 5 h3 * 6 h4 * 7 h5 * 8 h6 * 9 img * 10 t * 11 meta * ex: 100010000000 = bold and h2 * @param tagString The bitstring of the tag to be applied. * @param pos The position of the word that the given tag applies to. */ public void addTags(String tagString, int pos) { Integer i = new Integer(pos); if(tagString.length() == 12) { tags.put(i, tagString); } else { System.err.println("ERROR: Word.java\n\taddTags: incorrect tagString"); } count++; } /** * Tests a Word object for equality, based on the word string. * @param w The word to be compared to the current object. * @return True if the Word objects are equal. False otherwise. */ public boolean equals(Word w) { return word.equals(w.getWord()); } // /* // * Adds a new position of the word on the page. // * @param pos The position of the word. // */ // public void addPosition(int pos) // { // Integer i = new Integer(pos); // positions.add(i); // count++; // } /** * Returns the number of instances of the word. */ public int getCount() { return count; } /** * Returns the word itself. * @return The word string. */ public String getWord() { return word; } /** * Returns an iterator to the Set of tags. * The Set will consist of integer positions of the word on the * page. The values associated with the keys in the HashMap * are the tag bitstrings associated with the word at the * position. */ public Iterator getDetails() { Iterator it = tags.keySet().iterator(); return it; } public String posToTags(Integer poskey) //returns the tag string from the HashMap of the word object { return (String) tags.get( poskey ); } }