com.twitter.common.text.detector
Class PunctuationDetector

java.lang.Object
  extended by org.apache.lucene.util.AttributeSource
      extended by com.twitter.common.text.token.TokenStream
          extended by com.twitter.common.text.token.TokenProcessor
              extended by com.twitter.common.text.detector.RegexDetector
                  extended by com.twitter.common.text.detector.PunctuationDetector

public class PunctuationDetector
extends RegexDetector

Updates TokenTypeAttribute of a token to TokenType.PUNCTUATION if the token is identified as punctuation.


Nested Class Summary
static class PunctuationDetector.AbstractBuilder<N extends PunctuationDetector,T extends PunctuationDetector.AbstractBuilder<N,T>>
           
static class PunctuationDetector.Builder
           
 
Nested classes/interfaces inherited from class org.apache.lucene.util.AttributeSource
org.apache.lucene.util.AttributeSource.AttributeFactory, org.apache.lucene.util.AttributeSource.State
 
Field Summary
static String PUNCTUATION_CHAR_CLASS
           
static String PUNCTUATION_REGEX
           
static String SPACE_CHAR_CLASS
           
static String SPACE_REGEX
           
 
Constructor Summary
protected PunctuationDetector(TokenStream inputStream)
           
 
Method Summary
 
Methods inherited from class com.twitter.common.text.detector.RegexDetector
incrementToken, setRegexPattern, setType
 
Methods inherited from class com.twitter.common.text.token.TokenProcessor
getInputStream, getInstanceOf, reset
 
Methods inherited from class com.twitter.common.text.token.TokenStream
toStringList
 
Methods inherited from class org.apache.lucene.util.AttributeSource
addAttribute, addAttributeImpl, captureState, clearAttributes, cloneAttributes, equals, getAttribute, getAttributeClassesIterator, getAttributeFactory, getAttributeImplsIterator, hasAttribute, hasAttributes, hashCode, restoreState, toString
 
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
 

Field Detail

SPACE_CHAR_CLASS

public static final String SPACE_CHAR_CLASS
See Also:
Constant Field Values

SPACE_REGEX

public static final String SPACE_REGEX
See Also:
Constant Field Values

PUNCTUATION_CHAR_CLASS

public static final String PUNCTUATION_CHAR_CLASS
See Also:
Constant Field Values

PUNCTUATION_REGEX

public static final String PUNCTUATION_REGEX
See Also:
Constant Field Values
Constructor Detail

PunctuationDetector

protected PunctuationDetector(TokenStream inputStream)