/* Use, modification, and distribution are subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1.0.txt or copy at www.boost.org/LICENSE_1.0.txt) */ // Page Class // This class stores information about a specific webpage. // It contains a vector of Word objects and a vector of links. import java.util.*; /** * This class stores information about a webpage. It uses HashMap objects to * store the {@link Link} and {@link Word} objects that occur on the page. * @author Aaron Miller, Mike Ottum * @version 1.7 */ public class Page { protected HashMap linkList; protected HashMap wordList; protected String pageURL; protected boolean index; protected boolean follow; protected long checkSum; protected String header; protected String text; public void addText(String newText) { text = text + newText; if (text.charAt(text.length()-1) != ' ') { text = text.concat(" "); } } public String getText() { return text; } public void addHeader(String newHeader) { header = header + newHeader; if (header.charAt(header.length()-1) != ' ') { header = header.concat(" "); } } public String getHeader() { return header; } /** * Initializes the url string associated with the page and creates the * HashMaps. * @param url The absolute url of the webpage. */ public Page(String url) { pageURL = new String(url); linkList = new HashMap(); wordList = new HashMap(); index = true; follow = true; header = ""; text = ""; } /** * Sets the noIndex boolean value. If noIndex is true, then don't index * this page. If it is false, then do. * @param set Value to set noIndex equal to. */ public void setNoIndex(boolean set) { index = !set; } /** * Sets the noFollow boolean value. If noFollow is true, then do not * crawl the links on this page. If it is false, then do. * @param set Value to set noFollow equal to. */ public void setNoFollow(boolean set) { follow = !set; } /** * Tests a Page object for equality with the current Page. The test uses * the pageURL member variable as the equality test. Thus, if two pages * have the same URL, they are equal. * @param p The Page object to compare to the current Page. * @return True if the Pages are equal. False otherwise. */ public boolean equals(Page p) { return pageURL.equals(p.getURL()); } /** * Returns the current Page objects URL. * @return The pageURL member variable. */ public String getURL() { return pageURL; } /** * Adds a specific word and location to the hash of words on the page. * @param s The word string to be added. * @param location The location of the word on the page. * @param tags Tags associated with the string at the given location. */ public void addWord(String s, int location, String tags) { s = s.toLowerCase(); Word w; if(!wordList.containsKey(s)) { w = new Word(s, location); wordList.put(s, w); } else { // put the information into the word object that already // exists w = (Word)(wordList.get(s)); } w.addTags(tags, location); } /** * Adds a specific link and its position on the page to the link hash. * @param url The absolute url of the link. * @param pos The position of the link on the page. */ public void addLink(String url, int pos) { Link l; if(!linkList.containsKey(url)) { l = new Link(url, getURL(), pos); linkList.put(url,l); } else { l = (Link)(linkList.get(url)); l.addPosition(pos); } } /** * Returns an {@link Iterator} to the {@link Set} of URLs contained * on the page. */ public Iterator getLinks() // returns an iterator to a Set of Strings of links. { Iterator it = linkList.keySet().iterator(); return it; } /** * Returns an {@link Iterator} to the {@link Set} of words contained * on the page. */ public Iterator getWords() // returns an iterator to a Set of Strings of words. { Iterator it = wordList.keySet().iterator(); return it; } public Word keyToWord(String wordkey) //returns the word object from the HashMap { return (Word) wordList.get( wordkey ); } public Link keyToLink(String linkey) //returns the link object from the hashmap { return (Link) linkList.get( linkey ); } public void setCheckSum(long check) { checkSum = check; } }