-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathDissociatedPress.java
More file actions
133 lines (123 loc) · 5.18 KB
/
DissociatedPress.java
File metadata and controls
133 lines (123 loc) · 5.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.Scanner;
public class DissociatedPress {
// To run this program, replace FILENAME with some text file that you have.
private static final String FILENAME = "warandpeace.txt";
private static final Random rng = new Random();
private final Map<String, String> followMap = new HashMap<>();
private final int maxPat;
private final int maxFollow;
private String pattern;
public DissociatedPress() { this(" ", 7, 200); }
/**
* The constructor for DissociatedPress.
* @param pattern The initial pattern.
* @param maxPat The maximum length for the current pattern.
* @param maxFollow The maximum length for the follow string for any pattern.
*/
public DissociatedPress(String pattern, int maxPat, int maxFollow) {
this.pattern = pattern;
this.maxPat = maxPat;
this.maxFollow = maxFollow;
}
/**
* Process the next character of the input text and update the followmap for all
* the suffixes of the current pattern.
* @param next The next character of input to process.
*/
public void processChar(char next) {
// Add the next character to follow strings of every suffix of current pattern.
for(int i = 0; i < pattern.length(); i++) {
String partPat = pattern.substring(i);
String follow = followMap.getOrDefault(partPat, "");
if(follow.length() < maxFollow) {
followMap.put(partPat, follow + next);
}
}
pattern += next;
if(pattern.length() > maxPat) {
pattern = pattern.substring(1);
}
}
/**
* Use the Dissociated Press pattern map to emit a random character based on the
* current pattern, and update the pattern accordingly.
* @param maxEmitPat The maximum pattern length to use in emission, regardless of the
* patterns stored in the followmap.
* @return A randomly chosen character from the follow string of the current pattern.
*/
public char nextChar(int maxEmitPat) {
while(pattern.length() > maxEmitPat) {
pattern = pattern.substring(1);
}
while(pattern.length() > 0) {
String follow = followMap.getOrDefault(pattern, "");
if(follow.length() > 0) {
char next = follow.charAt(rng.nextInt(follow.length()));
pattern += next;
if(pattern.length() > maxPat) { pattern = pattern.substring(1); }
return next;
}
pattern = pattern.substring(1);
}
return '$';
}
public void outputInfo() {
StringBuilder characters = new StringBuilder();
int[] patCount = new int[maxPat + 1];
int[] followCount = new int[maxPat + 1];
int[] saturated = new int[maxPat + 1];
for(String pat: followMap.keySet()) {
patCount[pat.length()]++;
int fl = followMap.get(pat).length();
followCount[pat.length()] += fl;
if(fl == maxFollow) { saturated[pat.length()]++; }
if(pat.length() == 1) { characters.append(pat); }
}
System.out.println("Characters found in data are:\n" + characters);
System.out.println("\nLength\tTotal\tSaturated\tAverage");
for(int patLen = 1; patLen <= maxPat; patLen++) {
System.out.printf("%d\t%d\t%d\t\t%.3f\n", patLen, patCount[patLen],
saturated[patLen], followCount[patLen] / (double)patCount[patLen]);
}
System.out.println("\n");
}
/**
* For demonstration purposes, read in the text file "War and Peace" to be used to
* build up the followmap. Demonstrate the behaviour of the Dissociated Press
* technique to produce sample random test for pattern lengths from 1 to 7.
*/
public static void main(String[] args) throws IOException {
Scanner wap = new Scanner(new File(FILENAME));
DissociatedPress dp = new DissociatedPress();
while(wap.hasNextLine()) {
String line = wap.nextLine(); // nextLine() strips away newline character
for(int i = 0; i < line.length(); i++) {
dp.processChar(line.charAt(i));
}
dp.processChar(' '); // newline works as whitespace for this analysis
}
wap.close();
dp.outputInfo();
for(int maxEmitPat = 1; maxEmitPat < 8; maxEmitPat++) {
if(maxEmitPat > 1) { System.out.println("\n---\n"); }
System.out.println("Emit pattern length " + maxEmitPat + ".");
int currLineLen = 0, linesRemain = 10;
char prev = ' ';
while(linesRemain > 0) {
char next = dp.nextChar(maxEmitPat);
if(!(Character.isWhitespace(next) && Character.isWhitespace(prev))) {
if(currLineLen++ > 60 && Character.isWhitespace(next)) {
next = '\n'; currLineLen = 0; linesRemain--;
}
System.out.print(next);
}
prev = next;
}
}
}
}