001    // =================================================================================================
002    // Copyright 2011 Twitter, Inc.
003    // -------------------------------------------------------------------------------------------------
004    // Licensed under the Apache License, Version 2.0 (the "License");
005    // you may not use this work except in compliance with the License.
006    // You may obtain a copy of the License in the LICENSE file, or at:
007    //
008    //  http://www.apache.org/licenses/LICENSE-2.0
009    //
010    // Unless required by applicable law or agreed to in writing, software
011    // distributed under the License is distributed on an "AS IS" BASIS,
012    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013    // See the License for the specific language governing permissions and
014    // limitations under the License.
015    // =================================================================================================
016    
017    package com.twitter.common.text.filter;
018    
019    import java.util.Set;
020    
021    import com.google.common.base.Preconditions;
022    import com.google.common.collect.Sets;
023    
024    import com.twitter.common.text.token.TokenStream;
025    import com.twitter.common.text.token.attribute.TokenType;
026    import com.twitter.common.text.token.attribute.TokenTypeAttribute;
027    
028    /**
029     * Either filters out or retains tokens of a specified type(s). If the mode is {@code Mode.ACCEPT},
030     * tokens are retained. If the mode is {@code Mode.REJECT}, tokens are filtered out.
031     */
032    public class TokenTypeFilter extends TokenFilter {
033      public enum Mode { ACCEPT, REJECT };
034    
035      private final TokenTypeAttribute typeAttr;
036    
037      private Set<TokenType> types = Sets.newHashSet();
038      private Mode mode = Mode.ACCEPT;
039    
040      protected TokenTypeFilter(TokenStream inputStream) {
041        super(inputStream);
042        typeAttr = inputStream.getAttribute(TokenTypeAttribute.class);
043      }
044    
045      protected void setTypesToFilter(TokenType... types) {
046        Preconditions.checkNotNull(types);
047        for (TokenType type : types) {
048          Preconditions.checkNotNull(type);
049          this.types.add(type);
050        }
051      }
052    
053      protected void setMode(Mode mode) {
054        this.mode = mode;
055      }
056    
057      @Override
058      public boolean acceptToken() {
059        boolean match = types.contains(typeAttr.getType());
060        if (mode == Mode.REJECT) {
061          match = !match;
062        }
063    
064        return match;
065      }
066    
067      public static final class Builder {
068        private TokenTypeFilter filter;
069    
070        public Builder(TokenStream inputStream) {
071          filter = new TokenTypeFilter(inputStream);
072        }
073    
074        /**
075         * Sets token types to accept or filter.
076         *
077         * @param types token types to accept or filter
078         * @return this {@code Builder} object
079         */
080        public Builder setTypesToFilter(TokenType... types) {
081          filter.setTypesToFilter(types);
082          return this;
083        }
084    
085        /**
086         * Selects whether to accept tokens of the
087         * specified types or reject them.
088         *
089         * @param mode {@code Mode.ACCEPT} or {@code Mode.REJECT}
090         * @return this {@code Builder} object
091         */
092        public Builder setMode(Mode mode) {
093          filter.setMode(mode);
094          return this;
095        }
096    
097        public TokenTypeFilter build() {
098          return filter;
099        }
100      }
101    }