001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.Arrays;
020
021import org.apache.commons.text.matcher.StringMatcherFactory;
022
023/**
024 * A matcher class that can be queried to determine if a character array
025 * portion matches.
026 * <p>
027 * This class comes complete with various factory methods.
028 * If these do not suffice, you can subclass and implement your own matcher.
029 *
030 * @since 1.0
031 * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
032 */
033@Deprecated
034public abstract class StrMatcher {
035
036    /**
037     * Matches the comma character.
038     */
039    private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
040
041    /**
042     * Matches the tab character.
043     */
044    private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
045
046    /**
047     * Matches the space character.
048     */
049    private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
050
051    /**
052     * Matches the same characters as StringTokenizer,
053     * namely space, tab, newline, form feed.
054     */
055    private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
056
057    /**
058     * Matches the String trim() whitespace characters.
059     */
060    private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
061
062    /**
063     * Matches the double quote character.
064     */
065    private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
066
067    /**
068     * Matches the double quote character.
069     */
070    private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
071
072    /**
073     * Matches the single or double quote character.
074     */
075    private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
076
077    /**
078     * Matches no characters.
079     */
080    private static final StrMatcher NONE_MATCHER = new NoMatcher();
081
082    // -----------------------------------------------------------------------
083
084    /**
085     * Returns a matcher which matches the comma character.
086     *
087     * @return a matcher for a comma
088     */
089    public static StrMatcher commaMatcher() {
090        return COMMA_MATCHER;
091    }
092
093    /**
094     * Returns a matcher which matches the tab character.
095     *
096     * @return a matcher for a tab
097     */
098    public static StrMatcher tabMatcher() {
099        return TAB_MATCHER;
100    }
101
102    /**
103     * Returns a matcher which matches the space character.
104     *
105     * @return a matcher for a space
106     */
107    public static StrMatcher spaceMatcher() {
108        return SPACE_MATCHER;
109    }
110
111    /**
112     * Matches the same characters as StringTokenizer,
113     * namely space, tab, newline and form feed.
114     *
115     * @return the split matcher
116     */
117    public static StrMatcher splitMatcher() {
118        return SPLIT_MATCHER;
119    }
120
121    /**
122     * Matches the String trim() whitespace characters.
123     *
124     * @return the trim matcher
125     */
126    public static StrMatcher trimMatcher() {
127        return TRIM_MATCHER;
128    }
129
130    /**
131     * Returns a matcher which matches the single quote character.
132     *
133     * @return a matcher for a single quote
134     */
135    public static StrMatcher singleQuoteMatcher() {
136        return SINGLE_QUOTE_MATCHER;
137    }
138
139    /**
140     * Returns a matcher which matches the double quote character.
141     *
142     * @return a matcher for a double quote
143     */
144    public static StrMatcher doubleQuoteMatcher() {
145        return DOUBLE_QUOTE_MATCHER;
146    }
147
148    /**
149     * Returns a matcher which matches the single or double quote character.
150     *
151     * @return a matcher for a single or double quote
152     */
153    public static StrMatcher quoteMatcher() {
154        return QUOTE_MATCHER;
155    }
156
157    /**
158     * Matches no characters.
159     *
160     * @return a matcher that matches nothing
161     */
162    public static StrMatcher noneMatcher() {
163        return NONE_MATCHER;
164    }
165
166    /**
167     * Creates a matcher from a character.
168     *
169     * @param ch  the character to match, must not be null
170     * @return a new Matcher for the given char
171     */
172    public static StrMatcher charMatcher(final char ch) {
173        return new CharMatcher(ch);
174    }
175
176    /**
177     * Creates a matcher from a set of characters.
178     *
179     * @param chars  the characters to match, null or empty matches nothing
180     * @return a new matcher for the given char[]
181     */
182    public static StrMatcher charSetMatcher(final char... chars) {
183        if (chars == null || chars.length == 0) {
184            return NONE_MATCHER;
185        }
186        if (chars.length == 1) {
187            return new CharMatcher(chars[0]);
188        }
189        return new CharSetMatcher(chars);
190    }
191
192    /**
193     * Creates a matcher from a string representing a set of characters.
194     *
195     * @param chars  the characters to match, null or empty matches nothing
196     * @return a new Matcher for the given characters
197     */
198    public static StrMatcher charSetMatcher(final String chars) {
199        if (chars == null || chars.length() == 0) {
200            return NONE_MATCHER;
201        }
202        if (chars.length() == 1) {
203            return new CharMatcher(chars.charAt(0));
204        }
205        return new CharSetMatcher(chars.toCharArray());
206    }
207
208    /**
209     * Creates a matcher from a string.
210     *
211     * @param str  the string to match, null or empty matches nothing
212     * @return a new Matcher for the given String
213     */
214    public static StrMatcher stringMatcher(final String str) {
215        if (str == null || str.length() == 0) {
216            return NONE_MATCHER;
217        }
218        return new StringMatcher(str);
219    }
220
221    //-----------------------------------------------------------------------
222    /**
223     * Constructor.
224     */
225    protected StrMatcher() {
226        super();
227    }
228
229    /**
230     * Returns the number of matching characters, or zero if there is no match.
231     * <p>
232     * This method is called to check for a match.
233     * The parameter <code>pos</code> represents the current position to be
234     * checked in the string <code>buffer</code> (a character array which must
235     * not be changed).
236     * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
237     * <p>
238     * The character array may be larger than the active area to be matched.
239     * Only values in the buffer between the specified indices may be accessed.
240     * <p>
241     * The matching code may check one character or many.
242     * It may check characters preceding <code>pos</code> as well as those
243     * after, so long as no checks exceed the bounds specified.
244     * <p>
245     * It must return zero for no match, or a positive number if a match was found.
246     * The number indicates the number of characters that matched.
247     *
248     * @param buffer  the text content to match against, do not change
249     * @param pos  the starting position for the match, valid for buffer
250     * @param bufferStart  the first active index in the buffer, valid for buffer
251     * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
252     * @return the number of matching characters, or zero if there is no match
253     */
254    public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
255
256    /**
257     * Returns the number of matching characters, or zero if there is no match.
258     * <p>
259     * This method is called to check for a match.
260     * The parameter <code>pos</code> represents the current position to be
261     * checked in the string <code>buffer</code> (a character array which must
262     * not be changed).
263     * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
264     * <p>
265     * The matching code may check one character or many.
266     * It may check characters preceding <code>pos</code> as well as those after.
267     * <p>
268     * It must return zero for no match, or a positive number if a match was found.
269     * The number indicates the number of characters that matched.
270     *
271     * @param buffer  the text content to match against, do not change
272     * @param pos  the starting position for the match, valid for buffer
273     * @return the number of matching characters, or zero if there is no match
274     */
275    public int isMatch(final char[] buffer, final int pos) {
276        return isMatch(buffer, pos, 0, buffer.length);
277    }
278
279    //-----------------------------------------------------------------------
280    /**
281     * Class used to define a set of characters for matching purposes.
282     */
283    static final class CharSetMatcher extends StrMatcher {
284        /** The set of characters to match. */
285        private final char[] chars;
286
287        /**
288         * Constructor that creates a matcher from a character array.
289         *
290         * @param chars  the characters to match, must not be null
291         */
292        CharSetMatcher(final char[] chars) {
293            super();
294            this.chars = chars.clone();
295            Arrays.sort(this.chars);
296        }
297
298        /**
299         * Returns whether or not the given character matches.
300         *
301         * @param buffer  the text content to match against, do not change
302         * @param pos  the starting position for the match, valid for buffer
303         * @param bufferStart  the first active index in the buffer, valid for buffer
304         * @param bufferEnd  the end index of the active buffer, valid for buffer
305         * @return the number of matching characters, or zero if there is no match
306         */
307        @Override
308        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
309            return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
310        }
311    }
312
313    //-----------------------------------------------------------------------
314    /**
315     * Class used to define a character for matching purposes.
316     */
317    static final class CharMatcher extends StrMatcher {
318        /** The character to match. */
319        private final char ch;
320
321        /**
322         * Constructor that creates a matcher that matches a single character.
323         *
324         * @param ch  the character to match
325         */
326        CharMatcher(final char ch) {
327            super();
328            this.ch = ch;
329        }
330
331        /**
332         * Returns whether or not the given character matches.
333         *
334         * @param buffer  the text content to match against, do not change
335         * @param pos  the starting position for the match, valid for buffer
336         * @param bufferStart  the first active index in the buffer, valid for buffer
337         * @param bufferEnd  the end index of the active buffer, valid for buffer
338         * @return the number of matching characters, or zero if there is no match
339         */
340        @Override
341        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
342            return ch == buffer[pos] ? 1 : 0;
343        }
344    }
345
346    //-----------------------------------------------------------------------
347    /**
348     * Class used to define a set of characters for matching purposes.
349     */
350    static final class StringMatcher extends StrMatcher {
351        /** The string to match, as a character array. */
352        private final char[] chars;
353
354        /**
355         * Constructor that creates a matcher from a String.
356         *
357         * @param str  the string to match, must not be null
358         */
359        StringMatcher(final String str) {
360            super();
361            chars = str.toCharArray();
362        }
363
364        /**
365         * Returns whether or not the given text matches the stored string.
366         *
367         * @param buffer  the text content to match against, do not change
368         * @param pos  the starting position for the match, valid for buffer
369         * @param bufferStart  the first active index in the buffer, valid for buffer
370         * @param bufferEnd  the end index of the active buffer, valid for buffer
371         * @return the number of matching characters, or zero if there is no match
372         */
373        @Override
374        public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
375            final int len = chars.length;
376            if (pos + len > bufferEnd) {
377                return 0;
378            }
379            for (int i = 0; i < chars.length; i++, pos++) {
380                if (chars[i] != buffer[pos]) {
381                    return 0;
382                }
383            }
384            return len;
385        }
386
387        @Override
388        public String toString() {
389            return super.toString() + ' ' + Arrays.toString(chars);
390        }
391
392    }
393
394    //-----------------------------------------------------------------------
395    /**
396     * Class used to match no characters.
397     */
398    static final class NoMatcher extends StrMatcher {
399
400        /**
401         * Constructs a new instance of <code>NoMatcher</code>.
402         */
403        NoMatcher() {
404            super();
405        }
406
407        /**
408         * Always returns <code>false</code>.
409         *
410         * @param buffer  the text content to match against, do not change
411         * @param pos  the starting position for the match, valid for buffer
412         * @param bufferStart  the first active index in the buffer, valid for buffer
413         * @param bufferEnd  the end index of the active buffer, valid for buffer
414         * @return the number of matching characters, or zero if there is no match
415         */
416        @Override
417        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
418            return 0;
419        }
420    }
421
422    //-----------------------------------------------------------------------
423    /**
424     * Class used to match whitespace as per trim().
425     */
426    static final class TrimMatcher extends StrMatcher {
427
428        /**
429         * Constructs a new instance of <code>TrimMatcher</code>.
430         */
431        TrimMatcher() {
432            super();
433        }
434
435        /**
436         * Returns whether or not the given character matches.
437         *
438         * @param buffer  the text content to match against, do not change
439         * @param pos  the starting position for the match, valid for buffer
440         * @param bufferStart  the first active index in the buffer, valid for buffer
441         * @param bufferEnd  the end index of the active buffer, valid for buffer
442         * @return the number of matching characters, or zero if there is no match
443         */
444        @Override
445        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
446            return buffer[pos] <= 32 ? 1 : 0;
447        }
448    }
449
450}