Sunteți pe pagina 1din 6

import java.util.

Comparator;

import components.map.Map;
import components.map.Map.Pair;
import components.map.Map2;
import components.simplereader.SimpleReader;
import components.simplereader.SimpleReader1L;
import components.simplewriter.SimpleWriter;
import components.simplewriter.SimpleWriter1L;
import components.sortingmachine.SortingMachine;
import components.sortingmachine.SortingMachine2;

/**
* A program used to generate a Word Clouds given a file input.
*
* @author Derek Stevens
* @author Jason Tysl
*
*/
public class TagCloudGenerator {

/**
* Definition of whitespace separators.
*/
private static final String SEPARATORS = " \t\n\r,-.!?[]';:/()";

/**
* Comparator used to sort strings alphabetically.
*
* @author Derek Stevens
* @author Jason Tysl
*
*/
private static class KeyLT
implements Comparator<Map.Pair<String, Integer>> {
@Override
public int compare(Map.Pair<String, Integer> o1,
Map.Pair<String, Integer> o2) {
if (o1.key().equals(o2.key())) {
return o1.value().compareTo(o2.value());
}
return o1.key().compareTo(o2.key());
}
}

/**
* Compare {@code Integer}s in numerical order.
*/
private static class ValueLT
implements Comparator<Map.Pair<String, Integer>> {
@Override
public int compare(Map.Pair<String, Integer> o1,
Map.Pair<String, Integer> o2) {
return o2.value().compareTo(o1.value());
}
}

/**
* Returns the first "word" (maximal length string of characters not in
* {@code SEPARATORS}) or "separator string" (maximal length string of
* characters in {@code SEPARATORS}) in the given {@code text} starting at
* the given {@code position}.
*
* @param text
* the {@code String} from which to get the word or separator
* string
* @param position
* the starting index
* @return the first word or separator string found in {@code text} starting
* at index {@code position}
* @requires 0 <= position < |text|
* @ensures <pre>
* nextWordOrSeparator =
* text[position, position + |nextWordOrSeparator|) and
* if entries(text[position, position + 1)) intersection entries(SEPARATORS) = {}
* then
* entries(nextWordOrSeparator) intersection entries(SEPARATORS) = {} and
* (position + |nextWordOrSeparator| = |text| or
* entries(text[position, position + |nextWordOrSeparator| + 1))
* intersection entries(SEPARATORS) /= {})
* else
* entries(nextWordOrSeparator) is subset of entries(SEPARATORS) and
* (position + |nextWordOrSeparator| = |text| or
* entries(text[position, position + |nextWordOrSeparator| + 1))
* is not subset of entries(SEPARATORS))
* </pre>
*/
private static String nextWordOrSeparator(String text, int position) {
StringBuilder firstStringSep = new StringBuilder();
boolean first = SEPARATORS.indexOf(text.charAt(position)) >= 0;
int finish = position + 1;
while (finish < text.length()
&& first == SEPARATORS.indexOf(text.charAt(finish)) >= 0) {
finish++;
}
return text.substring(position, finish);
}

/**
* Outputs header for the HTML file in {@code fOut}.
*
* @param fOut
* HTML output destination
* @param numWords
* the number of words in the cloud
* @param fName
* the name of the file
* @requires fIn is open
*/
public static void outputHeader(SimpleWriter fOut, int numWords,
String fName) {
fOut.println("<html>");
//prints head
fOut.println("\t <head>");
//creates class for changing color when the mouse hovers
fOut.println("\t\t<title>" + "Top " + numWords + " words in " + fName
+ "</title>");
fOut.println(
"<link href=\"http://web.cse.ohio-state.edu/software/2231/web-
sw2/assignments/projects/tag-cloud-generator/data/tagcloud.css\" rel=\"stylesheet\"
type=\"text/css\">");
fOut.println("\t </head>");
//prints title of body
fOut.println("\t" + "<body>");
fOut.println("\t\t<h2>" + "Top " + numWords + " words in " + fName
+ "</h2>");
fOut.println("\t\t<hr>");
fOut.println("<div class=\"cdiv\">");
fOut.println("<p class=\"cbox\">");
}

/**
* Outputs the tag cloud of the map that changes their size depending on
* their value.
*
* @param map
* - map with words and frequencies as keys and values
* @param wordOrder
* - SortingMachine to determine what order the words come out in
* @param out
* - file to write out to
*/
public static void outputCloud(Map<String, Integer> map,
SortingMachine<Pair<String, Integer>> wordOrder, SimpleWriter out) {
final int averageFontSize = 20, maxFontSize = 48, minFontSize = 11;
int avg = 0;
for (Map.Pair<String, Integer> pair : map) {
avg += pair.value();
}
if (map.size() == 0) {
avg = 1;
} else {
avg = avg / map.size();
}

while (map.size() > 0) {


Pair<String, Integer> pair = wordOrder.removeFirst();

int size = averageFontSize * pair.value() / avg;


if (size > maxFontSize) {
size = maxFontSize;
} else if (size < minFontSize) {
size = minFontSize;
}
out.println("<span style=\"cursor:default\" class=\"f" + size
+ "\" title=\"count: " + pair.value() + "\">" + pair.key()
+ "</span>");
map.remove(pair.key());
}
}

/**
* Outputs footer for the HTML file in {@code fOut}.
*
* @param fOut
* HTML output destination
* @requires fIn is open
*/
public static void outputFooter(SimpleWriter fOut) {
fOut.println("</p>");
fOut.println("</div>");
fOut.println("</body>");
fOut.println("</html>");
}

/***
* Takes the map replaces it with the {@code numWords} most frequent keys.
*
* @param map
* - unsorted map with all of the words and frequencies
* @param numSort
* - SortingMaching of {@code Integer}s that will sort the map by
* value
* @param numWords
* - the number of words that will be in the Tag Cloud
* @replaces map
* @ensures {@code map} only has the most frequent {@code numWords} words
*/
public static void takeMostCommonWords(Map<String, Integer> map,
SortingMachine<Pair<String, Integer>> numSort, int numWords) {

Map<String, Integer> shortenedMap = map.newInstance();


if (numWords < map.size()) {
for (int i = 0; i < numWords; i++) {
Pair<String, Integer> pair = numSort.removeFirst();
shortenedMap.add(pair.key(), pair.value());
}
map.clear();
map.transferFrom(shortenedMap);
}
}

public static void buildSortingMachineFromMap(Map<String, Integer> map,


SortingMachine<Pair<String, Integer>> machine) {
for (Pair<String, Integer> pair : map) {
machine.add(pair);
}
machine.changeToExtractionMode();
}
/**
* Generates a map of words mapped to the number of occurrences.
*
* @param fIn
* the file to read words from
* @requires fIn is Open
* @return a Map of words as keys and values as counts
*/
public static void buildOccurrenceMap(SimpleReader fIn,
Map<String, Integer> countMap) {
int position = 0;
while (!fIn.atEOS()) {
String line = fIn.nextLine();
line = line.toLowerCase();
//looping through whole line
/*
* can just initialize position here and we wont have to reset it
*/
while (position < line.length()) {
String nextWord = nextWordOrSeparator(line, position);
if (SEPARATORS.indexOf(line.charAt(position)) == -1) {
//checking if word exists in map and treating accordingly
if (countMap.hasKey(nextWord)) {
int val = countMap.value(nextWord);
val++;
countMap.replaceValue(nextWord, val);
} else {
countMap.add(nextWord, 1);
}
}
position += nextWord.length();
}
//reseting position to 0 at the end of the line
position = 0;
}
}

/**
* Main method used for getting user input.
*
* @param args
*/
public static void main(String[] args) {
SimpleReader consoleIn = new SimpleReader1L();
SimpleWriter consoleOut = new SimpleWriter1L();

consoleOut.print("Welcome to the Tag cloud generator.");


consoleOut.println("Enter the file to read words from: ");
String fInName = consoleIn.nextLine();
consoleOut.println("Enter the file to output html to: ");
String fOutName = consoleIn.nextLine();
consoleOut.println(
"Enter the number of words you would like in the cloud: ");
int numWords = consoleIn.nextInteger();
SimpleReader fIn = new SimpleReader1L(fInName);
SimpleWriter fOut = new SimpleWriter1L(fOutName);

//building data representations needed


KeyLT keyOrder = new KeyLT();
ValueLT valOrder = new ValueLT();
/*
* sortingmachine2 uses selection sort which is better when we don't
* know how many we want to extract
*/
SortingMachine<Map.Pair<String, Integer>> keySort = new SortingMachine2<>(
keyOrder);
SortingMachine<Map.Pair<String, Integer>> valSort = new SortingMachine2<>(
valOrder);
Map<String, Integer> tagMap = new Map2<>();
buildOccurrenceMap(fIn, tagMap);
buildSortingMachineFromMap(tagMap, valSort);
takeMostCommonWords(tagMap, valSort, numWords);
buildSortingMachineFromMap(tagMap, keySort);
//outputting HTML
outputHeader(fOut, numWords, fInName);
outputCloud(tagMap, keySort, fOut);
outputFooter(fOut);

consoleIn.close();
consoleOut.close();
}
}

S-ar putea să vă placă și