Frames | No Frames |
1: /* Matcher.java -- Instance of a regular expression applied to a char sequence. 2: Copyright (C) 2002, 2004, 2006 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package java.util.regex; 40: 41: import gnu.java.lang.CPStringBuilder; 42: 43: import gnu.java.util.regex.CharIndexed; 44: import gnu.java.util.regex.RE; 45: import gnu.java.util.regex.REMatch; 46: 47: /** 48: * Instance of a regular expression applied to a char sequence. 49: * 50: * @since 1.4 51: */ 52: public final class Matcher implements MatchResult 53: { 54: private Pattern pattern; 55: private CharSequence input; 56: // We use CharIndexed as an input object to the getMatch method in order 57: // that /\G/ (the end of the previous match) may work. The information 58: // of the previous match is stored in the CharIndexed object. 59: private CharIndexed inputCharIndexed; 60: private int position; 61: private int appendPosition; 62: private REMatch match; 63: 64: /** 65: * The start of the region of the input on which to match. 66: */ 67: private int regionStart; 68: 69: /** 70: * The end of the region of the input on which to match. 71: */ 72: private int regionEnd; 73: 74: /** 75: * True if the match process should look beyond the 76: * region marked by regionStart to regionEnd when 77: * performing lookAhead, lookBehind and boundary 78: * matching. 79: */ 80: private boolean transparentBounds; 81: 82: /** 83: * The flags that affect the anchoring bounds. 84: * If {@link #hasAnchoringBounds()} is {@code true}, 85: * the match process will honour the 86: * anchoring bounds: ^, \A, \Z, \z and $. If 87: * {@link #hasAnchoringBounds()} is {@code false}, 88: * the anchors are ignored and appropriate flags, 89: * stored in this variable, are used to provide this 90: * behaviour. 91: */ 92: private int anchoringBounds; 93: 94: Matcher(Pattern pattern, CharSequence input) 95: { 96: this.pattern = pattern; 97: this.input = input; 98: this.inputCharIndexed = RE.makeCharIndexed(input, 0); 99: regionStart = 0; 100: regionEnd = input.length(); 101: transparentBounds = false; 102: anchoringBounds = 0; 103: } 104: 105: /** 106: * @param sb The target string buffer 107: * @param replacement The replacement string 108: * 109: * @exception IllegalStateException If no match has yet been attempted, 110: * or if the previous match operation failed 111: * @exception IndexOutOfBoundsException If the replacement string refers 112: * to a capturing group that does not exist in the pattern 113: */ 114: public Matcher appendReplacement (StringBuffer sb, String replacement) 115: throws IllegalStateException 116: { 117: assertMatchOp(); 118: sb.append(input.subSequence(appendPosition, 119: match.getStartIndex()).toString()); 120: sb.append(RE.getReplacement(replacement, match, 121: RE.REG_REPLACE_USE_BACKSLASHESCAPE)); 122: appendPosition = match.getEndIndex(); 123: return this; 124: } 125: 126: /** 127: * @param sb The target string buffer 128: */ 129: public StringBuffer appendTail (StringBuffer sb) 130: { 131: sb.append(input.subSequence(appendPosition, input.length()).toString()); 132: return sb; 133: } 134: 135: /** 136: * @exception IllegalStateException If no match has yet been attempted, 137: * or if the previous match operation failed 138: */ 139: public int end () 140: throws IllegalStateException 141: { 142: assertMatchOp(); 143: return match.getEndIndex(); 144: } 145: 146: /** 147: * @param group The index of a capturing group in this matcher's pattern 148: * 149: * @exception IllegalStateException If no match has yet been attempted, 150: * or if the previous match operation failed 151: * @exception IndexOutOfBoundsException If the replacement string refers 152: * to a capturing group that does not exist in the pattern 153: */ 154: public int end (int group) 155: throws IllegalStateException 156: { 157: assertMatchOp(); 158: return match.getEndIndex(group); 159: } 160: 161: public boolean find () 162: { 163: boolean first = (match == null); 164: if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) 165: match = pattern.getRE().getMatch(inputCharIndexed, position, anchoringBounds); 166: else 167: match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 168: position, anchoringBounds); 169: if (match != null) 170: { 171: int endIndex = match.getEndIndex(); 172: // Are we stuck at the same position? 173: if (!first && endIndex == position) 174: { 175: match = null; 176: // Not at the end of the input yet? 177: if (position < input.length() - 1) 178: { 179: position++; 180: return find(position); 181: } 182: else 183: return false; 184: } 185: position = endIndex; 186: return true; 187: } 188: return false; 189: } 190: 191: /** 192: * @param start The index to start the new pattern matching 193: * 194: * @exception IndexOutOfBoundsException If the replacement string refers 195: * to a capturing group that does not exist in the pattern 196: */ 197: public boolean find (int start) 198: { 199: if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) 200: match = pattern.getRE().getMatch(inputCharIndexed, start, anchoringBounds); 201: else 202: match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 203: start, anchoringBounds); 204: if (match != null) 205: { 206: position = match.getEndIndex(); 207: return true; 208: } 209: return false; 210: } 211: 212: /** 213: * @exception IllegalStateException If no match has yet been attempted, 214: * or if the previous match operation failed 215: */ 216: public String group () 217: { 218: assertMatchOp(); 219: return match.toString(); 220: } 221: 222: /** 223: * @param group The index of a capturing group in this matcher's pattern 224: * 225: * @exception IllegalStateException If no match has yet been attempted, 226: * or if the previous match operation failed 227: * @exception IndexOutOfBoundsException If the replacement string refers 228: * to a capturing group that does not exist in the pattern 229: */ 230: public String group (int group) 231: throws IllegalStateException 232: { 233: assertMatchOp(); 234: return match.toString(group); 235: } 236: 237: /** 238: * @param replacement The replacement string 239: */ 240: public String replaceFirst (String replacement) 241: { 242: reset(); 243: // Semantics might not quite match 244: return pattern.getRE().substitute(input, replacement, position, 245: RE.REG_REPLACE_USE_BACKSLASHESCAPE); 246: } 247: 248: /** 249: * @param replacement The replacement string 250: */ 251: public String replaceAll (String replacement) 252: { 253: reset(); 254: return pattern.getRE().substituteAll(input, replacement, position, 255: RE.REG_REPLACE_USE_BACKSLASHESCAPE); 256: } 257: 258: public int groupCount () 259: { 260: return pattern.getRE().getNumSubs(); 261: } 262: 263: public boolean lookingAt () 264: { 265: if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) 266: match = pattern.getRE().getMatch(inputCharIndexed, regionStart, 267: anchoringBounds|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX); 268: else 269: match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0, 270: anchoringBounds|RE.REG_FIX_STARTING_POSITION); 271: if (match != null) 272: { 273: if (match.getStartIndex() == 0) 274: { 275: position = match.getEndIndex(); 276: return true; 277: } 278: match = null; 279: } 280: return false; 281: } 282: 283: /** 284: * Attempts to match the entire input sequence against the pattern. 285: * 286: * If the match succeeds then more information can be obtained via the 287: * start, end, and group methods. 288: * 289: * @see #start() 290: * @see #end() 291: * @see #group() 292: */ 293: public boolean matches () 294: { 295: if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) 296: match = pattern.getRE().getMatch(inputCharIndexed, regionStart, 297: anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX); 298: else 299: match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0, 300: anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION); 301: if (match != null) 302: { 303: if (match.getStartIndex() == 0) 304: { 305: position = match.getEndIndex(); 306: if (position == input.length()) 307: return true; 308: } 309: match = null; 310: } 311: return false; 312: } 313: 314: /** 315: * Returns the Pattern that is interpreted by this Matcher 316: */ 317: public Pattern pattern () 318: { 319: return pattern; 320: } 321: 322: /** 323: * Resets the internal state of the matcher, including 324: * resetting the region to its default state of encompassing 325: * the whole input. The state of {@link #hasTransparentBounds()} 326: * and {@link #hasAnchoringBounds()} are unaffected. 327: * 328: * @return a reference to this matcher. 329: * @see #regionStart() 330: * @see #regionEnd() 331: * @see #hasTransparentBounds() 332: * @see #hasAnchoringBounds() 333: */ 334: public Matcher reset () 335: { 336: position = 0; 337: match = null; 338: regionStart = 0; 339: regionEnd = input.length(); 340: appendPosition = 0; 341: return this; 342: } 343: 344: /** 345: * Resets the internal state of the matcher, including 346: * resetting the region to its default state of encompassing 347: * the whole input. The state of {@link #hasTransparentBounds()} 348: * and {@link #hasAnchoringBounds()} are unaffected. 349: * 350: * @param input The new input character sequence. 351: * @return a reference to this matcher. 352: * @see #regionStart() 353: * @see #regionEnd() 354: * @see #hasTransparentBounds() 355: * @see #hasAnchoringBounds() 356: */ 357: public Matcher reset (CharSequence input) 358: { 359: this.input = input; 360: this.inputCharIndexed = RE.makeCharIndexed(input, 0); 361: return reset(); 362: } 363: 364: /** 365: * @return the index of a capturing group in this matcher's pattern 366: * 367: * @exception IllegalStateException If no match has yet been attempted, 368: * or if the previous match operation failed 369: */ 370: public int start () 371: throws IllegalStateException 372: { 373: assertMatchOp(); 374: return match.getStartIndex(); 375: } 376: 377: /** 378: * @param group The index of a capturing group in this matcher's pattern 379: * 380: * @exception IllegalStateException If no match has yet been attempted, 381: * or if the previous match operation failed 382: * @exception IndexOutOfBoundsException If the replacement string refers 383: * to a capturing group that does not exist in the pattern 384: */ 385: public int start (int group) 386: throws IllegalStateException 387: { 388: assertMatchOp(); 389: return match.getStartIndex(group); 390: } 391: 392: /** 393: * @return True if and only if the matcher hit the end of input. 394: * @since 1.5 395: */ 396: public boolean hitEnd() 397: { 398: return inputCharIndexed.hitEnd(); 399: } 400: 401: /** 402: * @return A string expression of this matcher. 403: */ 404: public String toString() 405: { 406: CPStringBuilder sb = new CPStringBuilder(); 407: sb.append(this.getClass().getName()) 408: .append("[pattern=").append(pattern.pattern()) 409: .append(" region=").append(regionStart).append(",").append(regionEnd) 410: .append(" anchoringBounds=").append(anchoringBounds == 0) 411: .append(" transparentBounds=").append(transparentBounds) 412: .append(" lastmatch=").append(match == null ? "" : match.toString()) 413: .append("]"); 414: return sb.toString(); 415: } 416: 417: private void assertMatchOp() 418: { 419: if (match == null) throw new IllegalStateException(); 420: } 421: 422: /** 423: * <p> 424: * Defines the region of the input on which to match. 425: * By default, the {@link Matcher} attempts to match 426: * the whole string (from 0 to the length of the input), 427: * but a region between {@code start} (inclusive) and 428: * {@code end} (exclusive) on which to match may instead 429: * be defined using this method. 430: * </p> 431: * <p> 432: * The behaviour of region matching is further affected 433: * by the use of transparent or opaque bounds (see 434: * {@link #useTransparentBounds(boolean)}) and whether or not 435: * anchors ({@code ^} and {@code $}) are in use 436: * (see {@link #useAnchoringBounds(boolean)}). With transparent 437: * bounds, the matcher is aware of input outside the bounds 438: * set by this method, whereas, with opaque bounds (the default) 439: * only the input within the bounds is used. The use of 440: * anchors are affected by this setting; with transparent 441: * bounds, anchors will match the beginning of the real input, 442: * while with opaque bounds they match the beginning of the 443: * region. {@link #useAnchoringBounds(boolean)} can be used 444: * to turn on or off the matching of anchors. 445: * </p> 446: * 447: * @param start the start of the region (inclusive). 448: * @param end the end of the region (exclusive). 449: * @return a reference to this matcher. 450: * @throws IndexOutOfBoundsException if either {@code start} or 451: * {@code end} are less than zero, 452: * if either {@code start} or 453: * {@code end} are greater than the 454: * length of the input, or if 455: * {@code start} is greater than 456: * {@code end}. 457: * @see #regionStart() 458: * @see #regionEnd() 459: * @see #hasTransparentBounds() 460: * @see #useTransparentBounds(boolean) 461: * @see #hasAnchoringBounds() 462: * @see #useAnchoringBounds(boolean) 463: * @since 1.5 464: */ 465: public Matcher region(int start, int end) 466: { 467: int length = input.length(); 468: if (start < 0) 469: throw new IndexOutOfBoundsException("The start position was less than zero."); 470: if (start >= length) 471: throw new IndexOutOfBoundsException("The start position is after the end of the input."); 472: if (end < 0) 473: throw new IndexOutOfBoundsException("The end position was less than zero."); 474: if (end > length) 475: throw new IndexOutOfBoundsException("The end position is after the end of the input."); 476: if (start > end) 477: throw new IndexOutOfBoundsException("The start position is after the end position."); 478: reset(); 479: regionStart = start; 480: regionEnd = end; 481: return this; 482: } 483: 484: /** 485: * The start of the region on which to perform matches (inclusive). 486: * 487: * @return the start index of the region. 488: * @see #region(int,int) 489: * #see #regionEnd() 490: * @since 1.5 491: */ 492: public int regionStart() 493: { 494: return regionStart; 495: } 496: 497: /** 498: * The end of the region on which to perform matches (exclusive). 499: * 500: * @return the end index of the region. 501: * @see #region(int,int) 502: * @see #regionStart() 503: * @since 1.5 504: */ 505: public int regionEnd() 506: { 507: return regionEnd; 508: } 509: 510: /** 511: * Returns true if the bounds of the region marked by 512: * {@link #regionStart()} and {@link #regionEnd()} are 513: * transparent. When these bounds are transparent, the 514: * matching process can look beyond them to perform 515: * lookahead, lookbehind and boundary matching operations. 516: * By default, the bounds are opaque. 517: * 518: * @return true if the bounds of the matching region are 519: * transparent. 520: * @see #useTransparentBounds(boolean) 521: * @see #region(int,int) 522: * @see #regionStart() 523: * @see #regionEnd() 524: * @since 1.5 525: */ 526: public boolean hasTransparentBounds() 527: { 528: return transparentBounds; 529: } 530: 531: /** 532: * Sets the transparency of the bounds of the region 533: * marked by {@link #regionStart()} and {@link #regionEnd()}. 534: * A value of {@code true} makes the bounds transparent, 535: * so the matcher can see beyond them to perform lookahead, 536: * lookbehind and boundary matching operations. A value 537: * of {@code false} (the default) makes the bounds opaque, 538: * restricting the match to the input region denoted 539: * by {@link #regionStart()} and {@link #regionEnd()}. 540: * 541: * @param transparent true if the bounds should be transparent. 542: * @return a reference to this matcher. 543: * @see #hasTransparentBounds() 544: * @see #region(int,int) 545: * @see #regionStart() 546: * @see #regionEnd() 547: * @since 1.5 548: */ 549: public Matcher useTransparentBounds(boolean transparent) 550: { 551: transparentBounds = transparent; 552: return this; 553: } 554: 555: /** 556: * Returns true if the matcher will honour the use of 557: * the anchoring bounds: {@code ^}, {@code \A}, {@code \Z}, 558: * {@code \z} and {@code $}. By default, the anchors 559: * are used. Note that the effect of the anchors is 560: * also affected by {@link #hasTransparentBounds()}. 561: * 562: * @return true if the matcher will attempt to match 563: * the anchoring bounds. 564: * @see #useAnchoringBounds(boolean) 565: * @see #hasTransparentBounds() 566: * @since 1.5 567: */ 568: public boolean hasAnchoringBounds() 569: { 570: return anchoringBounds == 0; 571: } 572: 573: /** 574: * Enables or disables the use of the anchoring bounds: 575: * {@code ^}, {@code \A}, {@code \Z}, {@code \z} and 576: * {@code $}. By default, their use is enabled. When 577: * disabled, the matcher will not attempt to match 578: * the anchors. 579: * 580: * @param useAnchors true if anchoring bounds should be used. 581: * @return a reference to this matcher. 582: * @since 1.5 583: * @see #hasAnchoringBounds() 584: */ 585: public Matcher useAnchoringBounds(boolean useAnchors) 586: { 587: if (useAnchors) 588: anchoringBounds = 0; 589: else 590: anchoringBounds = RE.REG_NOTBOL|RE.REG_NOTEOL; 591: return this; 592: } 593: 594: /** 595: * Returns a read-only snapshot of the current state of 596: * the {@link Matcher} as a {@link MatchResult}. Any 597: * subsequent changes to this instance are not reflected 598: * in the returned {@link MatchResult}. 599: * 600: * @return a {@link MatchResult} instance representing the 601: * current state of the {@link Matcher}. 602: */ 603: public MatchResult toMatchResult() 604: { 605: Matcher snapshot = new Matcher(pattern, input); 606: if (match != null) 607: snapshot.match = (REMatch) match.clone(); 608: return snapshot; 609: } 610: 611: }