33import java .util .regex .Matcher ;
44import java .util .regex .Pattern ;
55
6+ import static java .lang .String .valueOf ;
7+
68public class VerbalExpression {
79
810 private final Pattern pattern ;
@@ -14,10 +16,34 @@ public static class Builder {
1416 private StringBuilder suffixes = new StringBuilder ();
1517 private int modifiers = Pattern .MULTILINE ;
1618
19+ /**
20+ * Package private. Use {@link #regex()} to build a new one
21+ *
22+ * @since 1.2
23+ */
24+ Builder () {
25+ }
26+
27+ /**
28+ * Escapes any non-word char with two backslashes
29+ * used by any method, except {@link #add(String)}
30+ *
31+ * @param pValue - the string for char escaping
32+ * @return sanitized string value
33+ */
1734 private String sanitize (final String pValue ) {
1835 return pValue .replaceAll ("[\\ W]" , "\\ \\ $0" );
1936 }
2037
38+ /**
39+ * Counts occurrences of some substring in whole string
40+ * Same as org.apache.commons.lang3.StringUtils#countMatches(String, java.lang.String)
41+ * by effect. Used to count braces for {@link #or(String)} method
42+ *
43+ * @param where - where to find
44+ * @param what - what needs to count matches
45+ * @return 0 if nothing found, count of occurrences instead
46+ */
2147 private int countOccurrencesOf (String where , String what ) {
2248 return (where .length () - where .replace (what , "" ).length ()) / what .length ();
2349 }
@@ -40,11 +66,22 @@ public VerbalExpression build() {
4066 * @param pValue - literal expression, not sanitized
4167 * @return this builder
4268 */
43- public Builder add (String pValue ) {
69+ public Builder add (final String pValue ) {
4470 this .source .append (pValue );
4571 return this ;
4672 }
4773
74+ /**
75+ * Append a regex from builder and wrap it with unnamed group (?: ... )
76+ *
77+ * @param regex - VerbalExpression.Builder, that not changed
78+ * @return this builder
79+ * @since 1.2
80+ */
81+ public Builder add (final Builder regex ) {
82+ return this .group ().add (regex .build ().toString ()).endGr ();
83+ }
84+
4885 /**
4986 * Enable or disable the expression to start at the beginning of the line
5087 *
@@ -147,7 +184,7 @@ public Builder anything() {
147184 * Add expression that matches anything, but not passed argument
148185 *
149186 * @param pValue - the string not to match
150- * @return
187+ * @return this builder
151188 */
152189 public Builder anythingButNot (final String pValue ) {
153190 return this .add ("(?:[^" + sanitize (pValue ) + "]*)" );
@@ -273,6 +310,12 @@ public Builder anyOf(final String pValue) {
273310 return this ;
274311 }
275312
313+ /**
314+ * Shortcut to {@link #anyOf(String)}
315+ *
316+ * @param value - CharSequence every char from can be matched
317+ * @return this builder
318+ */
276319 public Builder any (final String value ) {
277320 return this .anyOf (value );
278321 }
@@ -368,6 +411,15 @@ public Builder withAnyCase(final boolean pEnable) {
368411 return this ;
369412 }
370413
414+ /**
415+ * Turn ON matching with ignoring case
416+ * Example:
417+ * // matches "a"
418+ * // matches "A"
419+ * regex().find("a").withAnyCase()
420+ *
421+ * @return this builder
422+ */
371423 public Builder withAnyCase () {
372424 return withAnyCase (true );
373425 }
@@ -381,16 +433,59 @@ public Builder searchOneLine(final boolean pEnable) {
381433 return this ;
382434 }
383435
384- public Builder multiple (final String pValue ) {
385- switch (pValue .charAt (0 )) {
386- case '*' :
387- case '+' :
388- return this .add (pValue );
436+ /**
437+ * Convenient method to show that string usage count is exact count, range count or simply one or more
438+ * Usage:
439+ * regex().multiply("abc") // Produce (?:abc)+
440+ * regex().multiply("abc", null) // Produce (?:abc)+
441+ * regex().multiply("abc", (int)from) // Produce (?:abc){from}
442+ * regex().multiply("abc", (int)from, (int)to) // Produce (?:abc){from, to}
443+ * regex().multiply("abc", (int)from, (int)to, (int)...) // Produce (?:abc)+
444+ *
445+ * @param pValue - the string to be looked for
446+ * @param count - (optional) if passed one or two numbers, it used to show count or range count
447+ * @return this builder
448+ * @see #oneOrMore()
449+ * @see #then(String)
450+ * @see #zeroOrMore()
451+ */
452+ public Builder multiple (final String pValue , final int ... count ) {
453+ if (count == null ) {
454+ return this .then (pValue ).oneOrMore ();
455+ }
456+ switch (count .length ) {
457+ case 1 :
458+ return this .then (pValue ).count (count [0 ]);
459+ case 2 :
460+ return this .then (pValue ).count (count [0 ], count [1 ]);
389461 default :
390- return this .add ( this . sanitize ( pValue ) + '+' );
462+ return this .then ( pValue ). oneOrMore ( );
391463 }
392464 }
393465
466+ /**
467+ * Adds "+" char to regexp
468+ * Same effect as {@link #atLeast(int)} with "1" argument
469+ * Also, used by {@link #multiple(String, int...)} when second argument is null, or have length more than 2
470+ *
471+ * @return this builder
472+ * @since 1.2
473+ */
474+ public Builder oneOrMore () {
475+ return this .add ("+" );
476+ }
477+
478+ /**
479+ * Adds "*" char to regexp, means zero or more times repeated
480+ * Same effect as {@link #atLeast(int)} with "0" argument
481+ *
482+ * @return this builder
483+ * @since 1.2
484+ */
485+ public Builder zeroOrMore () {
486+ return this .add ("*" );
487+ }
488+
394489 /**
395490 * Add count of previous group
396491 * for example:
@@ -419,6 +514,22 @@ public Builder count(final int from, final int to) {
419514 return this ;
420515 }
421516
517+ /**
518+ * Produce range count with only minimal number of occurrences
519+ * for example:
520+ * .find("w").atLeast(1) // produce (?:w){1,}
521+ *
522+ * @param from - minimal number of occurrences
523+ * @return this Builder
524+ * @see #count(int)
525+ * @see #oneOrMore()
526+ * @see #zeroOrMore()
527+ * @since 1.2
528+ */
529+ public Builder atLeast (final int from ) {
530+ return this .add ("{" ).add (valueOf (from )).add (",}" );
531+ }
532+
422533 /**
423534 * Add a alternative expression to be matched
424535 *
@@ -452,6 +563,34 @@ public Builder capture() {
452563 return this .add ("(" );
453564 }
454565
566+ /**
567+ * Shortcut for {@link #capture()}
568+ *
569+ * @return this builder
570+ * @since 1.2
571+ */
572+ public Builder capt () {
573+ return this .capture ();
574+ }
575+
576+ /**
577+ * Same as {@link #capture()}, but don't save result
578+ * May be used to set count of duplicated captures, without creating a new saved capture
579+ * Example:
580+ * // Without group() - count(2) applies only to second capture
581+ * regex().group()
582+ * .capt().range("0", "1").endCapt().tab()
583+ * .capt().digit().count(5).endCapt()
584+ * .endGr().count(2);
585+ *
586+ * @return this builder
587+ * @since 1.2
588+ */
589+ public Builder group () {
590+ this .suffixes .append (")" );
591+ return this .add ("(?:" );
592+ }
593+
455594 /**
456595 * Close brace for previous capture and remove last closed brace from suffixes
457596 * Can be used to continue build regex after capture or to add multiply captures
@@ -463,12 +602,41 @@ public Builder endCapture() {
463602 this .suffixes .setLength (suffixes .length () - 1 );
464603 return this .add (")" );
465604 } else {
466- throw new IllegalStateException ("Can't end capture when it not started" );
605+ throw new IllegalStateException ("Can't end capture (group) when it not started" );
467606 }
468607 }
469- }
470608
609+ /**
610+ * Shortcut for {@link #endCapture()}
611+ *
612+ * @return this builder
613+ * @since 1.2
614+ */
615+ public Builder endCapt () {
616+ return this .endCapture ();
617+ }
471618
619+ /**
620+ * Closes current unnamed and unmatching group
621+ * Shortcut for {@link #endCapture()}
622+ * Use it with {@link #group()} for prettify code
623+ * Example:
624+ * regex().group().maybe("word").count(2).endGr()
625+ *
626+ * @return this builder
627+ * @since 1.2
628+ */
629+ public Builder endGr () {
630+ return this .endCapture ();
631+ }
632+ }
633+
634+ /**
635+ * Use builder {@link #regex()} (or {@link #regex(ru.lanwen.verbalregex.VerbalExpression.Builder)})
636+ * to create new instance of VerbalExpression
637+ *
638+ * @param pattern - {@link java.util.regex.Pattern} that constructed by builder
639+ */
472640 private VerbalExpression (final Pattern pattern ) {
473641 this .pattern = pattern ;
474642 }
@@ -518,6 +686,7 @@ public String getText(final String toTest) {
518686 * @param toTest - string to extract from
519687 * @param group - group to extract
520688 * @return extracted group
689+ * @since 1.1
521690 */
522691 public String getText (final String toTest , final int group ) {
523692 Matcher m = pattern .matcher (toTest );
@@ -539,6 +708,7 @@ public String toString() {
539708 *
540709 * @param pBuilder - instance to clone
541710 * @return new VerbalExpression.Builder copied from passed
711+ * @since 1.1
542712 */
543713 public static Builder regex (final Builder pBuilder ) {
544714 Builder builder = new Builder ();
@@ -555,6 +725,7 @@ public static Builder regex(final Builder pBuilder) {
555725 * Creates new instance of VerbalExpression builder
556726 *
557727 * @return new VerbalExpression.Builder
728+ * @since 1.1
558729 */
559730 public static Builder regex () {
560731 return new Builder ();
0 commit comments