Cloudgenerator

import java.util.
Comparator;
import components.map.Map;
import components.map.Map.Pair;
import components.map.Map2;
import components.simplereader.SimpleReader;
import components.simplereader.SimpleReader1L;
import components.simplewriter.SimpleWriter;
import components.simplewriter.SimpleWriter1L;
import components.sortingmachine.SortingMachine;
import components.sortingmachine.SortingMachine2;
/**
* A program used to generate a Word Clouds given a file input.
*
* @author Derek Stevens
* @author Jason Tysl
*
*/
public class TagCloudGenerator {
/**
* Definition of whitespace separators.
*/
private static final String SEPARATORS = " \t\n\r,-.!?[]';:/()";
/**
* Comparator used to sort strings alphabetically.
*
* @author Derek Stevens
* @author Jason Tysl
*
*/
private static class KeyLT
implements Comparator<Map.Pair<String, Integer>> {
@Override
public int compare(Map.Pair<String, Integer> o1,
Map.Pair<String, Integer> o2) {
if (o1.key().equals(o2.key())) {
return o1.value().compareTo(o2.value());
}
return o1.key().compareTo(o2.key());
}
}
/**
* Compare {@code Integer}s in numerical order.
*/
private static class ValueLT
implements Comparator<Map.Pair<String, Integer>> {
@Override
public int compare(Map.Pair<String, Integer> o1,
Map.Pair<String, Integer> o2) {
return o2.value().compareTo(o1.value());
}
}
/**
* Returns the first "word" (maximal length string of characters not in
* {@code SEPARATORS}) or "separator string" (maximal length string of
* characters in {@code SEPARATORS}) in the given {@code text} starting at
* the given {@code position}.
*
* @param text
* the {@code String} from which to get the word or separator
* string
* @param position
* the starting index
* @return the first word or separator string found in {@code text} starting
* at index {@code position}
* @requires 0 <= position < |text|
* @ensures <pre>
* nextWordOrSeparator =
* text[position, position + |nextWordOrSeparator|) and
* if entries(text[position, position + 1)) intersection entries(SEPARATORS) = {}
* then
* entries(nextWordOrSeparator) intersection entries(SEPARATORS) = {} and
* (position + |nextWordOrSeparator| = |text| or
* entries(text[position, position + |nextWordOrSeparator| + 1))
* intersection entries(SEPARATORS) /= {})
* else
* entries(nextWordOrSeparator) is subset of entries(SEPARATORS) and
* (position + |nextWordOrSeparator| = |text| or
* entries(text[position, position + |nextWordOrSeparator| + 1))
* is not subset of entries(SEPARATORS))
* </pre>
*/
private static String nextWordOrSeparator(String text, int position) {
StringBuilder firstStringSep = new StringBuilder();
boolean first = SEPARATORS.indexOf(text.charAt(position)) >= 0;
int finish = position + 1;
while (finish < text.length()
&& first == SEPARATORS.indexOf(text.charAt(finish)) >= 0) {
finish++;
}
return text.substring(position, finish);
}
/**
* Outputs header for the HTML file in {@code fOut}.
*
* @param fOut
* HTML output destination
* @param numWords
* the number of words in the cloud
* @param fName
* the name of the file
* @requires fIn is open
*/
public static void outputHeader(SimpleWriter fOut, int numWords,
String fName) {
fOut.println("<html>");
//prints head
fOut.println("\t <head>");
//creates class for changing color when the mouse hovers
fOut.println("\t\t<title>" + "Top " + numWords + " words in " + fName
+ "</title>");
fOut.println(
"<link href=\"http://web.cse.ohio-state.edu/software/2231/web-
sw2/assignments/projects/tag-cloud-generator/data/tagcloud.css\" rel=\"stylesheet\"
type=\"text/css\">");
fOut.println("\t </head>");
//prints title of body
fOut.println("\t" + "<body>");
fOut.println("\t\t<h2>" + "Top " + numWords + " words in " + fName
+ "</h2>");
fOut.println("\t\t<hr>");
fOut.println("<div class=\"cdiv\">");
fOut.println("<p class=\"cbox\">");
}
/**
* Outputs the tag cloud of the map that changes their size depending on
* their value.
*
* @param map
* - map with words and frequencies as keys and values
* @param wordOrder
* - SortingMachine to determine what order the words come out in
* @param out
* - file to write out to
*/
public static void outputCloud(Map<String, Integer> map,
SortingMachine<Pair<String, Integer>> wordOrder, SimpleWriter out) {
final int averageFontSize = 20, maxFontSize = 48, minFontSize = 11;
int avg = 0;
for (Map.Pair<String, Integer> pair : map) {
avg += pair.value();
}
if (map.size() == 0) {
avg = 1;
} else {
avg = avg / map.size();
}
while (map.size() > 0) {

Pair<String, Integer> pair = wordOrder.removeFirst();
int size = averageFontSize * pair.value() / avg;

if (size > maxFontSize) {
size = maxFontSize;
} else if (size < minFontSize) {
size = minFontSize;
}
out.println("<span style=\"cursor:default\" class=\"f" + size
+ "\" title=\"count: " + pair.value() + "\">" + pair.key()
+ "</span>");
map.remove(pair.key());
}
}
/**
* Outputs footer for the HTML file in {@code fOut}.
*
* @param fOut
* HTML output destination
* @requires fIn is open
*/
public static void outputFooter(SimpleWriter fOut) {
fOut.println("</p>");
fOut.println("</div>");
fOut.println("</body>");
fOut.println("</html>");
}
/***
* Takes the map replaces it with the {@code numWords} most frequent keys.
*
* @param map
* - unsorted map with all of the words and frequencies
* @param numSort
* - SortingMaching of {@code Integer}s that will sort the map by
* value
* @param numWords
* - the number of words that will be in the Tag Cloud
* @replaces map
* @ensures {@code map} only has the most frequent {@code numWords} words
*/
public static void takeMostCommonWords(Map<String, Integer> map,
SortingMachine<Pair<String, Integer>> numSort, int numWords) {
Map<String, Integer> shortenedMap = map.newInstance();

if (numWords < map.size()) {
for (int i = 0; i < numWords; i++) {
Pair<String, Integer> pair = numSort.removeFirst();
shortenedMap.add(pair.key(), pair.value());
}
map.clear();
map.transferFrom(shortenedMap);
}
}
public static void buildSortingMachineFromMap(Map<String, Integer> map,

SortingMachine<Pair<String, Integer>> machine) {
for (Pair<String, Integer> pair : map) {
machine.add(pair);
}
machine.changeToExtractionMode();
}
/**
* Generates a map of words mapped to the number of occurrences.
*
* @param fIn
* the file to read words from
* @requires fIn is Open
* @return a Map of words as keys and values as counts
*/
public static void buildOccurrenceMap(SimpleReader fIn,
Map<String, Integer> countMap) {
int position = 0;
while (!fIn.atEOS()) {
String line = fIn.nextLine();
line = line.toLowerCase();
//looping through whole line
/*
* can just initialize position here and we wont have to reset it
*/
while (position < line.length()) {
String nextWord = nextWordOrSeparator(line, position);
if (SEPARATORS.indexOf(line.charAt(position)) == -1) {
//checking if word exists in map and treating accordingly
if (countMap.hasKey(nextWord)) {
int val = countMap.value(nextWord);
val++;
countMap.replaceValue(nextWord, val);
} else {
countMap.add(nextWord, 1);
}
}
position += nextWord.length();
}
//reseting position to 0 at the end of the line
position = 0;
}
}
/**
* Main method used for getting user input.
*
* @param args
*/
public static void main(String[] args) {
SimpleReader consoleIn = new SimpleReader1L();
SimpleWriter consoleOut = new SimpleWriter1L();
consoleOut.print("Welcome to the Tag cloud generator.");

consoleOut.println("Enter the file to read words from: ");
String fInName = consoleIn.nextLine();
consoleOut.println("Enter the file to output html to: ");
String fOutName = consoleIn.nextLine();
consoleOut.println(
"Enter the number of words you would like in the cloud: ");
int numWords = consoleIn.nextInteger();
SimpleReader fIn = new SimpleReader1L(fInName);
SimpleWriter fOut = new SimpleWriter1L(fOutName);
//building data representations needed

KeyLT keyOrder = new KeyLT();
ValueLT valOrder = new ValueLT();
/*
* sortingmachine2 uses selection sort which is better when we don't
* know how many we want to extract
*/
SortingMachine<Map.Pair<String, Integer>> keySort = new SortingMachine2<>(
keyOrder);
SortingMachine<Map.Pair<String, Integer>> valSort = new SortingMachine2<>(
valOrder);
Map<String, Integer> tagMap = new Map2<>();
buildOccurrenceMap(fIn, tagMap);
buildSortingMachineFromMap(tagMap, valSort);
takeMostCommonWords(tagMap, valSort, numWords);
buildSortingMachineFromMap(tagMap, keySort);
//outputting HTML
outputHeader(fOut, numWords, fInName);
outputCloud(tagMap, keySort, fOut);
outputFooter(fOut);
consoleIn.close();
consoleOut.close();
}
}

Cloudgenerator

Încărcat de

Informații document

Titlu original

Drepturi de autor

Formate disponibile

Partajați acest document

Partajați sau inserați document

Opțiuni de partajare

Vi se pare util acest document?

Este necorespunzător acest conținut?

Drepturi de autor:

Formate disponibile

Cloudgenerator

Încărcat de

Drepturi de autor:

Formate disponibile

import java.util.

while (map.size() > 0) {

int size = averageFontSize * pair.value() / avg;

Map<String, Integer> shortenedMap = map.newInstance();

public static void buildSortingMachineFromMap(Map<String, Integer> map,

consoleOut.print("Welcome to the Tag cloud generator.");

//building data representations needed

S-ar putea să vă placă și