| 1 | /*************************************************************************** |
|---|
| 2 | * toolsRegexp.cc |
|---|
| 3 | * |
|---|
| 4 | * Thu Sep 20 2005 |
|---|
| 5 | * Copyright 2005 Dmytro Milinevskyy |
|---|
| 6 | * milinevskyy@gmail.com |
|---|
| 7 | ****************************************************************************/ |
|---|
| 8 | |
|---|
| 9 | /* |
|---|
| 10 | * This program is free software; you can redistribute it and/or modify |
|---|
| 11 | * it under the terms of the GNU Lesser General Public License version 2.1 as published by |
|---|
| 12 | * the Free Software Foundation; |
|---|
| 13 | * |
|---|
| 14 | * This program is distributed in the hope that it will be useful, |
|---|
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 17 | * GNU Library General Public License for more details. |
|---|
| 18 | * |
|---|
| 19 | * You should have received a copy of the GNU Lesser General Public License |
|---|
| 20 | * along with this program; if not, write to the Free Software |
|---|
| 21 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
|---|
| 22 | */ |
|---|
| 23 | |
|---|
| 24 | /** |
|---|
| 25 | * vim indentation settings |
|---|
| 26 | * set tabstop=4 |
|---|
| 27 | * set shiftwidth=4 |
|---|
| 28 | */ |
|---|
| 29 | |
|---|
| 30 | #include <libdodo/directives.h> |
|---|
| 31 | |
|---|
| 32 | #ifdef PCRE_EXT |
|---|
| 33 | #include <pcre.h> |
|---|
| 34 | #else |
|---|
| 35 | #include <sys/types.h> |
|---|
| 36 | #include <regex.h> |
|---|
| 37 | #endif |
|---|
| 38 | |
|---|
| 39 | namespace dodo { |
|---|
| 40 | namespace tools { |
|---|
| 41 | /** |
|---|
| 42 | * @struct __regexp__ |
|---|
| 43 | * @brief defines regular expression internal handles |
|---|
| 44 | */ |
|---|
| 45 | struct __regexp__ { |
|---|
| 46 | #ifdef PCRE_EXT |
|---|
| 47 | pcre *code; ///< compiled pattern |
|---|
| 48 | #else |
|---|
| 49 | regex_t code; ///< compiled pattern |
|---|
| 50 | bool notCompiled; ///< true if not compiled |
|---|
| 51 | #endif |
|---|
| 52 | }; |
|---|
| 53 | }; |
|---|
| 54 | }; |
|---|
| 55 | |
|---|
| 56 | #include <libdodo/toolsRegexp.h> |
|---|
| 57 | #include <libdodo/types.h> |
|---|
| 58 | #include <libdodo/toolsRegexpEx.h> |
|---|
| 59 | |
|---|
| 60 | using namespace dodo::tools; |
|---|
| 61 | |
|---|
| 62 | regexp::regexp(regexp &) |
|---|
| 63 | { |
|---|
| 64 | } |
|---|
| 65 | |
|---|
| 66 | //------------------------------------------------------------------- |
|---|
| 67 | |
|---|
| 68 | regexp::regexp() : extended(true), |
|---|
| 69 | icase(false), |
|---|
| 70 | greedy(true), |
|---|
| 71 | multiline(false), |
|---|
| 72 | regex(new __regexp__) |
|---|
| 73 | { |
|---|
| 74 | #ifdef PCRE_EXT |
|---|
| 75 | #else |
|---|
| 76 | regex->notCompiled = true; |
|---|
| 77 | #endif |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | //------------------------------------------------------------------- |
|---|
| 81 | |
|---|
| 82 | regexp::regexp(const dodo::string &pattern) : extended(true), |
|---|
| 83 | icase(false), |
|---|
| 84 | greedy(true), |
|---|
| 85 | multiline(false), |
|---|
| 86 | regex(new __regexp__) |
|---|
| 87 | { |
|---|
| 88 | #ifdef PCRE_EXT |
|---|
| 89 | #else |
|---|
| 90 | regex->notCompiled = true; |
|---|
| 91 | #endif |
|---|
| 92 | |
|---|
| 93 | compile(pattern); |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | //------------------------------------------------------------------- |
|---|
| 97 | |
|---|
| 98 | regexp::~regexp() |
|---|
| 99 | { |
|---|
| 100 | #ifdef PCRE_EXT |
|---|
| 101 | #else |
|---|
| 102 | if (!regex->notCompiled) |
|---|
| 103 | regfree(®ex->code); |
|---|
| 104 | |
|---|
| 105 | #endif |
|---|
| 106 | |
|---|
| 107 | delete regex; |
|---|
| 108 | } |
|---|
| 109 | |
|---|
| 110 | //------------------------------------------------------------------- |
|---|
| 111 | |
|---|
| 112 | bool |
|---|
| 113 | regexp::match(const dodo::string &pattern, |
|---|
| 114 | const dodo::string &sample, |
|---|
| 115 | dodoStringArray &pockets) |
|---|
| 116 | { |
|---|
| 117 | dodo_try { |
|---|
| 118 | compile(pattern); |
|---|
| 119 | } dodo_catch (exception::basic *e UNUSED) { |
|---|
| 120 | if (e->function == REGEXPEX_COMPILE) |
|---|
| 121 | return false; |
|---|
| 122 | else |
|---|
| 123 | dodo_rethrow; |
|---|
| 124 | } |
|---|
| 125 | |
|---|
| 126 | return match(sample, pockets); |
|---|
| 127 | } |
|---|
| 128 | |
|---|
| 129 | //------------------------------------------------------------------- |
|---|
| 130 | |
|---|
| 131 | bool |
|---|
| 132 | regexp::match(const dodo::string &sample, |
|---|
| 133 | dodoStringArray &pockets) |
|---|
| 134 | { |
|---|
| 135 | pockets.clear(); |
|---|
| 136 | if (!boundMatch(sample)) |
|---|
| 137 | return false; |
|---|
| 138 | |
|---|
| 139 | pockets.reserve(boundaries.size()); |
|---|
| 140 | |
|---|
| 141 | dodoArray<__regex_match__>::const_iterator i(boundaries.begin()), j(boundaries.end()); |
|---|
| 142 | for (; i != j; ++i) |
|---|
| 143 | pockets.push_back(dodo::string(sample.data() + i->begin, i->end - i->begin)); |
|---|
| 144 | |
|---|
| 145 | return true; |
|---|
| 146 | } |
|---|
| 147 | |
|---|
| 148 | //------------------------------------------------------------------- |
|---|
| 149 | |
|---|
| 150 | bool |
|---|
| 151 | regexp::boundMatch(const dodo::string &sample) |
|---|
| 152 | { |
|---|
| 153 | boundaries.clear(); |
|---|
| 154 | |
|---|
| 155 | #ifdef PCRE_EXT |
|---|
| 156 | int subs; |
|---|
| 157 | |
|---|
| 158 | if (pcre_fullinfo(regex->code, NULL, PCRE_INFO_CAPTURECOUNT, &subs) != 0) |
|---|
| 159 | return false; |
|---|
| 160 | |
|---|
| 161 | subs *= 3; |
|---|
| 162 | subs += 3; |
|---|
| 163 | |
|---|
| 164 | int *oVector = new int[subs]; |
|---|
| 165 | int rc = pcre_exec(regex->code, NULL, sample.data(), sample.size(), 0, 0, oVector, subs); |
|---|
| 166 | if (rc <= 0) { |
|---|
| 167 | delete [] oVector; |
|---|
| 168 | |
|---|
| 169 | return false; |
|---|
| 170 | } |
|---|
| 171 | |
|---|
| 172 | __regex_match__ bound; |
|---|
| 173 | |
|---|
| 174 | for (int j = 1; j < rc; ++j) { |
|---|
| 175 | subs = j * 2; |
|---|
| 176 | bound.begin = oVector[subs]; |
|---|
| 177 | bound.end = oVector[subs + 1]; |
|---|
| 178 | boundaries.push_back(bound); |
|---|
| 179 | } |
|---|
| 180 | |
|---|
| 181 | delete [] oVector; |
|---|
| 182 | |
|---|
| 183 | return true; |
|---|
| 184 | #else |
|---|
| 185 | int subs = regex->code.re_nsub + 1; |
|---|
| 186 | regmatch_t *pmatch = new regmatch_t[subs]; |
|---|
| 187 | |
|---|
| 188 | int res = regexec(®ex->code, sample.data(), subs, pmatch, 0); |
|---|
| 189 | if (res != 0) { |
|---|
| 190 | delete [] pmatch; |
|---|
| 191 | return false; |
|---|
| 192 | } |
|---|
| 193 | |
|---|
| 194 | __regex_match__ bound; |
|---|
| 195 | |
|---|
| 196 | for (int i(1); i < subs; ++i) { |
|---|
| 197 | bound.begin = pmatch[i].rm_so; |
|---|
| 198 | bound.end = pmatch[i].rm_eo; |
|---|
| 199 | boundaries.push_back(bound); |
|---|
| 200 | } |
|---|
| 201 | |
|---|
| 202 | delete [] pmatch; |
|---|
| 203 | |
|---|
| 204 | return true; |
|---|
| 205 | #endif |
|---|
| 206 | } |
|---|
| 207 | |
|---|
| 208 | //------------------------------------------------------------------- |
|---|
| 209 | |
|---|
| 210 | void |
|---|
| 211 | regexp::compile(const dodo::string &pattern) |
|---|
| 212 | { |
|---|
| 213 | int bits(0); |
|---|
| 214 | |
|---|
| 215 | #ifdef PCRE_EXT |
|---|
| 216 | if (icase) |
|---|
| 217 | bits |= PCRE_CASELESS; |
|---|
| 218 | if (!greedy) |
|---|
| 219 | bits |= PCRE_UNGREEDY; |
|---|
| 220 | if (multiline) |
|---|
| 221 | bits |= PCRE_MULTILINE; |
|---|
| 222 | bits |= PCRE_DOTALL; |
|---|
| 223 | |
|---|
| 224 | int errOffset(0), errn(0); |
|---|
| 225 | const char *error; |
|---|
| 226 | regex->code = pcre_compile2(pattern.data(), bits, &errn, &error, &errOffset, NULL); |
|---|
| 227 | if (regex->code == NULL) |
|---|
| 228 | dodo_throw exception::basic(exception::MODULE_TOOLSREGEXP, REGEXPEX_COMPILE, exception::ERRNO_PCRE, errn, error, __LINE__, __FILE__, pattern); |
|---|
| 229 | |
|---|
| 230 | #else |
|---|
| 231 | if (extended) |
|---|
| 232 | bits |= REG_EXTENDED; |
|---|
| 233 | if (icase) |
|---|
| 234 | bits |= REG_ICASE; |
|---|
| 235 | |
|---|
| 236 | if (regex->notCompiled) |
|---|
| 237 | regex->notCompiled = false; |
|---|
| 238 | else |
|---|
| 239 | regfree(®ex->code); |
|---|
| 240 | |
|---|
| 241 | int errn = regcomp(®ex->code, pattern.data(), bits); |
|---|
| 242 | if (errn != 0) { |
|---|
| 243 | #define ERROR_LEN 256 |
|---|
| 244 | char error[ERROR_LEN]; |
|---|
| 245 | regerror(errn, ®ex->code, error, ERROR_LEN); |
|---|
| 246 | dodo_throw exception::basic(exception::MODULE_TOOLSREGEXP, REGEXPEX_COMPILE, exception::ERRNO_POSIXREGEX, errn, error, __LINE__, __FILE__, pattern); |
|---|
| 247 | } |
|---|
| 248 | #endif |
|---|
| 249 | } |
|---|
| 250 | |
|---|
| 251 | //------------------------------------------------------------------- |
|---|
| 252 | |
|---|
| 253 | dodo::string |
|---|
| 254 | regexp::replace(const dodo::string &pattern, |
|---|
| 255 | const dodo::string &sample, |
|---|
| 256 | const dodoStringArray &replacements) |
|---|
| 257 | { |
|---|
| 258 | dodo_try { |
|---|
| 259 | compile(pattern); |
|---|
| 260 | } dodo_catch (exception::basic *e UNUSED) { |
|---|
| 261 | if (e->function == REGEXPEX_COMPILE) |
|---|
| 262 | return sample; |
|---|
| 263 | else |
|---|
| 264 | dodo_rethrow; |
|---|
| 265 | } |
|---|
| 266 | |
|---|
| 267 | return replace(sample, replacements); |
|---|
| 268 | } |
|---|
| 269 | |
|---|
| 270 | //------------------------------------------------------------------- |
|---|
| 271 | |
|---|
| 272 | dodo::string |
|---|
| 273 | regexp::replace(const dodo::string &sample, |
|---|
| 274 | const dodoStringArray &replacements) |
|---|
| 275 | { |
|---|
| 276 | if (!boundMatch(sample)) |
|---|
| 277 | return sample; |
|---|
| 278 | |
|---|
| 279 | dodoArray<__regex_match__>::const_iterator i(boundaries.begin()), j(boundaries.end()), o; |
|---|
| 280 | |
|---|
| 281 | dodoStringArray::const_iterator k(replacements.begin()); |
|---|
| 282 | int subs = replacements.size(); |
|---|
| 283 | |
|---|
| 284 | dodo::string temp = sample; |
|---|
| 285 | |
|---|
| 286 | long shift = 0; |
|---|
| 287 | unsigned long begin(0), end(0); |
|---|
| 288 | |
|---|
| 289 | for (int res = 0; res < subs && i != j; ++i, ++res, ++k) { |
|---|
| 290 | if (res > 0) { |
|---|
| 291 | o = i - 1; |
|---|
| 292 | shift = o->end - o->begin - (k - 1)->size(); |
|---|
| 293 | } |
|---|
| 294 | begin = i->begin - shift; |
|---|
| 295 | end = i->end - shift; |
|---|
| 296 | |
|---|
| 297 | temp.replace(begin, end - begin, *k); |
|---|
| 298 | } |
|---|
| 299 | |
|---|
| 300 | return temp; |
|---|
| 301 | } |
|---|
| 302 | |
|---|
| 303 | //------------------------------------------------------------------- |
|---|
| 304 | |
|---|