因為項目需求,現在需要在c#中使用python的正則,最開始采用的方法是ironpython.
但是在ios上面編譯無法通過.好在ironpython是一個開源項目,於是拿到代碼開始研究.
因為我需要的功能很簡單,所以最後的代碼也只是判斷是否傳入的字符串匹配我傳入的python正則表達式.
解析部分代碼直接使用:
/* **************************************************************************** * * Copyright (c) Microsoft Corporation. * * This source code is subject to terms and conditions of the Microsoft Public * License. A copy of the license can be found in the License.html file at the * root of this distribution. If you cannot locate the Microsoft Public * License, please send an email to [email protected]. By using this source * code in any fashion, you are agreeing to be bound by the terms of the * Microsoft Public License. * * You must not remove this notice, or any other, from this software. * * ***************************************************************************/ using System; using System.Text; using System.Text.RegularExpressions; ////// python正則解析 /// By ZeaLotSean /// public static class PythonRegex { #region CONSTANTS // short forms //public static object I = 0x02; //public static object L = 0x04; //public static object M = 0x08; //public static object S = 0x10; //public static object U = 0x20; //public static object X = 0x40; // long forms public static object IGNORECASE = 0x02; public static object LOCALE = 0x04; public static object MULTILINE = 0x08; public static object DOTALL = 0x10; public static object UNICODE = 0x20; public static object VERBOSE = 0x40; #endregion ////// 編譯一個正則表達式 /// /// ///public static Python_Pattern Compile(string _pattern) { return new Python_Pattern(_pattern, 0, true); } public static bool isMatched(string _pattern, string _str) { return isMatched(_pattern, _str, 0); } public static bool isMatched(string _pattern, string _str, int flags) { return new Python_Pattern(_pattern, flags).isMatched(_str); } /// /// Compiled reg-ex pattern /// public class Python_Pattern { internal Regex m_re; internal ParsedRegex m_pre; public Python_Pattern(string pattern) : this(pattern, 0) { } public Python_Pattern(string pattern, int flags) : this(pattern, flags, false) { } public Python_Pattern(string pattern, int flags, bool compiled) { m_pre = PreParseRegex(pattern); RegexOptions opts = FlagsToOption(flags); this.m_re = new Regex(m_pre.Pattern, opts | (compiled ? RegexOptions.Compiled : RegexOptions.None)); } ////// 傳入字符串查找是否匹配正則表達式 /// /// ///public bool isMatched(string _str) { return m_re.Match(_str).Success; } public string Pattern { get { return m_pre.UserPattern; } } } private static RegexOptions FlagsToOption(int flags) { RegexOptions opts = RegexOptions.None; if ((flags & (int)IGNORECASE) != 0) opts |= RegexOptions.IgnoreCase; if ((flags & (int)MULTILINE) != 0) opts |= RegexOptions.Multiline; if (((flags & (int)LOCALE)) == 0) opts &= (~RegexOptions.CultureInvariant); if ((flags & (int)DOTALL) != 0) opts |= RegexOptions.Singleline; if ((flags & (int)VERBOSE) != 0) opts |= RegexOptions.IgnorePatternWhitespace; return opts; } internal class ParsedRegex { public ParsedRegex(string pattern) { this.UserPattern = pattern; } public string UserPattern; public string Pattern; public RegexOptions Options = RegexOptions.CultureInvariant; } /// /// Preparses a regular expression text returning a ParsedRegex class /// that can be used for further regular expressions. /// 傳入一個python正則表達式,返回一個c#可用格式 /// private static ParsedRegex PreParseRegex(string pattern) { ParsedRegex res = new ParsedRegex(pattern); //string newPattern; int cur = 0, nameIndex; int curGroup = 0; bool containsNamedGroup = false; for (; ; ) { nameIndex = pattern.IndexOf((, cur); if (nameIndex > 0 && pattern[nameIndex - 1] == '\') { int curIndex = nameIndex - 2; int backslashCount = 1; while (curIndex >= 0 && pattern[curIndex] == '\') { backslashCount++; curIndex--; } // odd number of back slashes, this is an optional // paren that we should ignore. if ((backslashCount & 0x01) != 0) { cur++; continue; } } if (nameIndex == -1) break; if (nameIndex == pattern.Length - 1) break; switch (pattern[++nameIndex]) { case '?': // extension syntax if (nameIndex == pattern.Length - 1) { return null; } switch (pattern[++nameIndex]) { case 'P': // named regex, .NET doesn't expect the P so we'll remove it; // also, once we see a named group i.e. ?P then we need to start artificially // naming all unnamed groups from then on---this is to get around the fact that // the CLR RegEx support orders all the unnamed groups before all the named // groups, even if the named groups are before the unnamed ones in the pattern; // the artificial naming preserves the order of the groups and thus the order of // the matches if (nameIndex + 1 < pattern.Length && pattern[nameIndex + 1] == '=') { // match whatever was previously matched by the named group // remove the (?P= pattern = pattern.Remove(nameIndex - 2, 4); pattern = pattern.Insert(nameIndex - 2, \\k<); int tmpIndex = nameIndex; while (tmpIndex < pattern.Length && pattern[tmpIndex] != ')') tmpIndex++; if (tmpIndex == pattern.Length) { return null; } pattern = pattern.Substring(0, tmpIndex) + > + pattern.Substring(tmpIndex + 1); } else { containsNamedGroup = true; pattern = pattern.Remove(nameIndex, 1); } break; case 'i': res.Options |= RegexOptions.IgnoreCase; break; case 'L': res.Options &= ~(RegexOptions.CultureInvariant); break; case 'm': res.Options |= RegexOptions.Multiline; break; case 's': res.Options |= RegexOptions.Singleline; break; case 'u': break; case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break; case ':': break; // non-capturing case '=': break; // look ahead assertion case '<': break; // positive look behind assertion case '!': break; // negative look ahead assertion case '#': break; // inline comment case '(': // yes/no if group exists, we don't support this default: { return null; } } break; default: // just another group curGroup++; if (containsNamedGroup) { // need to name this unnamed group pattern = pattern.Insert(nameIndex, ?); } break; } cur = nameIndex; } cur = 0; for (; ; ) { nameIndex = pattern.IndexOf('\', cur); if (nameIndex == -1 || nameIndex == pattern.Length - 1) break; char curChar = pattern[++nameIndex]; switch (curChar) { case 'x': case 'u': case 'a': case 'b': case 'e': case 'f': case 'n': case 'r': case 't': case 'v': case 'c': case 's': case 'W': case 'w': case 'p': case 'P': case 'S': case 'd': case 'D': case 'Z': // known escape sequences, leave escaped. break; case '\': // escaping a \ cur += 2; break; default: System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar); switch (charClass) { // recognized word characters, always unescape. case System.Globalization.UnicodeCategory.ModifierLetter: case System.Globalization.UnicodeCategory.LowercaseLetter: case System.Globalization.UnicodeCategory.UppercaseLetter: case System.Globalization.UnicodeCategory.TitlecaseLetter: case System.Globalization.UnicodeCategory.OtherLetter: case System.Globalization.UnicodeCategory.LetterNumber: case System.Globalization.UnicodeCategory.OtherNumber: case System.Globalization.UnicodeCategory.ConnectorPunctuation: pattern = pattern.Remove(nameIndex - 1, 1); break; case System.Globalization.UnicodeCategory.DecimalDigitNumber: // actually don't want to unescape '', '' etc. which are references to groups break; } break; } cur++; } res.Pattern = pattern; return res; } static Random r = new Random(DateTime.Now.Millisecond); private static string GetRandomString() { return r.Next(Int32.MaxValue / 2, Int32.MaxValue).ToString(); } }