plis_regexp.h

00001 /*
00002  * version 1.6
00003  * Regexp is a class that encapsulates the Regular expression
00004  * stuff. Hopefully this means I can plug in different regexp
00005  * libraries without the rest of my code needing to be changed.
00006  * Written by Jim Morris,  jegm@sgi.com
00007  *
00008  * Ported to encapsulate PCRE by Dov Grobgeld 2001. The original licence
00009  * seems to allow any us whatsoever of the code.
00010  */
00011 #ifndef _REGEXP_H
00012 #define _REGEXP_H
00013 #include <stdio.h>
00014 #include <iostream>
00015 #include <stdlib.h>
00016 #include <malloc.h>
00017 #include <string.h>
00018 #include <assert.h>
00019 #include <pcre.h>
00020 
00024 namespace plis
00025 {
00026   class Range {
00027   private:
00028     int st, en;
00029     
00030   public:
00031     Range()
00032       {
00033         st=0; en= -1;
00034       }
00035     
00036     Range(int s, int e)
00037       {
00038         st= s; en= e;
00039       }
00040     
00041     int start(void) const { return st;}
00042     int end(void) const { return en;}
00043     int length(void) const { return (en-st)+1;}
00044   };
00045   
00049   class Regexp
00050     {
00051     private:
00052       pcre *repat;
00053       const char *target; // only used as a base address to get an offset
00054       int res;
00055       int iflg;
00056       int ovector[30];
00057       int number_of_substrings;
00058       
00059     public:
00063       Regexp(const char *rege, const char *flag_string = "")
00064         {
00065           int flags = chars_to_flags(flag_string);
00066           const char *error;
00067           int error_offset;
00068           
00069           repat = pcre_compile(rege, flags, &error, &error_offset, NULL);
00070           if (repat == NULL)
00071             {
00072               std::cerr << "pcre_compile() failed!\n";
00073               return ;
00074             }
00075           
00076         }
00077       
00078       ~Regexp()
00079         {
00080           free((char *)repat);
00081         }    
00082       
00084       int match(const std::string& targ)
00085         {
00086           int result;
00087           const char *subject = targ.c_str();  // Shortcut
00088           
00089           result= pcre_exec(repat,             /* result of pcre_compile() */
00090                             NULL,              /* we didn't study the pattern */
00091                             subject,           /* the subject string */
00092                             targ.size(),       /* the length of the subject string */
00093                             0,                 /* start at offset 0 in the subject */
00094                             0,                 /* default options */
00095                             ovector,           /* vector for substring information */
00096                             30);               /* number of elements in the vector */
00097           
00098           if (result > 0)
00099             {
00100               number_of_substrings = result;
00101             }
00102           else
00103             number_of_substrings = 0;
00104           
00105           return ((result >= 0) ? 1 : 0);
00106         }
00107       
00109       int groups()
00110         {
00111           return number_of_substrings;
00112         }
00113       
00115       Range getgroup(int n) const
00116         {
00117           // assert(n < number_of_substrings);
00118           
00119           return Range(ovector[2*n],
00120                        ovector[2*n+1]-1);
00121         }
00122       
00123     private:
00124       // Currently only recognizes the perl "i" flag for case
00125       // independant matching.
00126       int chars_to_flags(const std::string& flag_string)
00127         {
00128           int flags = 0;
00129           if (flag_string.find("i") != std::string::npos)
00130             flags |= PCRE_CASELESS;
00131           return flags;
00132         }
00133       
00134     };
00135 };
00136 #endif

plis - Perl Like Structures