"""File: word_list.py Purpose: Build a list of the words on a web page that that has a simple format. Run: python word_list.py Input: name of html file Output: list of the words on the page Note: Each line of the html file is a tag or a word. """ #---------------------------------------------------------------------- def get_words(ifile): """Extract words from an html page""" wlist = [] for line in ifile: line = line.strip() # Get rid of leading and trailing whitespace if len(line) > 0: # If this is a nonempty line . . . if line[0] != '<' and not line in wlist: wlist.append(line) return wlist #---------------------------------------------------------------------- def print_words(filename, word_list): """Print the words in a list""" print "The words in", filename, "are:" print " ", for word in word_list: print word, print #---------------------------------------------------------------------- filename = raw_input("What's the name of the HTML file?\n ") ifile = open(filename, "r") word_list = get_words(ifile) print_words(filename, word_list) ifile.close()