I'm working on an opensource grammar tool for OpenOffice, and one of the Java based rules I want to modify is having a problem I can't figure out. This Java rule should check the whole document for words that are matched in the coherency.txt file, but instead it's as if it resets at every paragraph.
The purpose of this rule is to make sure the document has coherency in spelling with words that can be spelled multiple ways.

Any help you can give would be great - thanks!

package de.danielnaber.languagetool.rules.de;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.ResourceBundle;
import de.danielnaber.languagetool.AnalyzedSentence;
import de.danielnaber.languagetool.AnalyzedToken;
import de.danielnaber.languagetool.AnalyzedTokenReadings;
import de.danielnaber.languagetool.JLanguageTool;
import de.danielnaber.languagetool.rules.Category;
import de.danielnaber.languagetool.rules.RuleMatch;
 * A rule that matches words for which two different spellings are used
 * throughout the document. Currently only implemented for German. Loads
 * the relevant word from <code>rules/de/coherency.txt</code>.
 * <p>Note that this should not be used for language variations like
 * American English vs. British English or German "alte Rechtschreibung"
 * vs. "neue Rechtschreibung" -- that's the task of a spell checker.
 * @author Daniel Naber
public class WordCoherencyRule extends GermanRule {
  private static final String FILE_NAME = "/de/coherency.txt";
  private static final String FILE_ENCODING = "utf-8";
  private final Map<String, String> relevantWords;        // e.g. "aufwendig -> aufwändig"
  private Map<String, RuleMatch> shouldNotAppearWord = new HashMap<String, RuleMatch>();  // e.g. aufwändig -> RuleMatch of aufwendig
  private final GermanLemmatizer germanLemmatizer;
  public WordCoherencyRule(ResourceBundle messages) throws IOException {
    if (messages != null)
      super.setCategory(new Category(messages.getString("category_misc")));
    relevantWords = loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(FILE_NAME)); 
    germanLemmatizer = new GermanLemmatizer();
  public String getId() {
    return "DE_WORD_COHERENCY";
  public String getDescription() {
    return "Einheitliche Schreibweise für Wörter mit mehr als einer korrekten Schreibweise";
  public RuleMatch[] match(AnalyzedSentence text) {
    final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
    final AnalyzedTokenReadings[] tokens = text.getTokens();
    int pos = 0;
    for (AnalyzedTokenReadings tmpToken : tokens) {
      //TODO: definitely should be changed
      //if the general lemmatizer is working
      //defaulting to the first element because the
      //general German lemmatizer is not (yet) there
      String token = tmpToken.getToken();
      if (tmpToken.isWhitespace()) {
        // ignore
      } else {
        final String origToken = token;
        final List<AnalyzedToken> readings = tmpToken.getReadings();
        // TODO: in theory we need to care about the other readings, too:
        if (readings != null && readings.size() > 0) {
          final String baseform = readings.get(0).getLemma();
          if (baseform != null) {
            token = baseform;
          } else {
            // not all words are known by the Tagger (esp. compounds), so use the
            // file lookup:
            final String manualLookup = germanLemmatizer.getBaseform(origToken);
            if (manualLookup != null)
              token = manualLookup;
        if (shouldNotAppearWord.containsKey(token)) {
          final RuleMatch otherMatch = shouldNotAppearWord.get(token);
          final String otherSpelling = otherMatch.getMessage();
          final String msg = "'" + token + "' und '" + otherSpelling +
                  "' sollten nicht gleichzeitig benutzt werden";
          final RuleMatch ruleMatch = new RuleMatch(this, pos, pos + origToken.length(), msg);
        } else if (relevantWords.containsKey(token)) {
          final String shouldNotAppear = relevantWords.get(token);
          // only used to display this spelling variation if the other one really occurs:
          final RuleMatch potentialRuleMatch = new RuleMatch(this, pos, pos + origToken.length(), token);
          shouldNotAppearWord.put(shouldNotAppear, potentialRuleMatch);
      pos += tmpToken.getToken().length();
    return toRuleMatchArray(ruleMatches);
  private Map<String, String> loadWords(InputStream file) throws IOException {
    final Map<String, String> map = new HashMap<String, String>();
    InputStreamReader isr = null;
    BufferedReader br = null;
    try {
      isr = new InputStreamReader(file, FILE_ENCODING);
      br = new BufferedReader(isr);
      String line;
      while ((line = br.readLine()) != null) {
        line = line.trim();
        if (line.length() < 1) {
        if (line.charAt(0) == '#') {      // ignore comments
        final String[] parts = line.split(";");
        if (parts.length != 2) {
          throw new IOException("Format error in file " + JLanguageTool.getDataBroker().getFromRulesDirAsUrl(FILE_NAME) + ", line: " + line);
        map.put(parts[0], parts[1]);
        map.put(parts[1], parts[0]);
    } finally {
      if (br != null) br.close();
      if (isr != null) isr.close();
    return map;
  public void reset() {
    shouldNotAppearWord = new HashMap<String, RuleMatch>();