程式師世界 >> 編程語言 >> .NET網頁編程 >> C# >> C#入門知識 >> c#中使用python語法的正則表達式.

c#中使用python語法的正則表達式.

編輯：C#入門知識

c#中使用python語法的正則表達式.

因為項目需求,現在需要在c#中使用python的正則,最開始采用的方法是ironpython.

但是在ios上面編譯無法通過.好在ironpython是一個開源項目,於是拿到代碼開始研究.

因為我需要的功能很簡單,所以最後的代碼也只是判斷是否傳入的字符串匹配我傳入的python正則表達式.

解析部分代碼直接使用:

/* ****************************************************************************
 *
 * Copyright (c) Microsoft Corporation. 
 *
 * This source code is subject to terms and conditions of the Microsoft Public
 * License. A  copy of the license can be found in the License.html file at the
 * root of this distribution. If  you cannot locate the  Microsoft Public
 * License, please send an email to  [email protected]. By using this source
 * code in any fashion, you are agreeing to be bound by the terms of the 
 * Microsoft Public License.
 *
 * You must not remove this notice, or any other, from this software.
 *
 * ***************************************************************************/

using System;
using System.Text;
using System.Text.RegularExpressions;

/// 
/// python正則解析
/// By ZeaLotSean
/// 
public static class PythonRegex
{
    #region CONSTANTS

    // short forms
    //public static object I = 0x02;
    //public static object L = 0x04;
    //public static object M = 0x08;
    //public static object S = 0x10;
    //public static object U = 0x20;
    //public static object X = 0x40;

    // long forms
    public static object IGNORECASE = 0x02;
    public static object LOCALE = 0x04;
    public static object MULTILINE = 0x08;
    public static object DOTALL = 0x10;
    public static object UNICODE = 0x20;
    public static object VERBOSE = 0x40;

    #endregion

    /// 
    /// 編譯一個正則表達式
    /// 
    ///
    /// 
    public static Python_Pattern Compile(string _pattern)
    {
        return new Python_Pattern(_pattern, 0, true);
    }

    public static bool isMatched(string _pattern, string _str)
    {
        return isMatched(_pattern, _str, 0);
    }

    public static bool isMatched(string _pattern, string _str, int flags)
    {
        return new Python_Pattern(_pattern, flags).isMatched(_str);
    }

    /// 
    /// Compiled reg-ex pattern
    /// 
    public class Python_Pattern
    {
        internal Regex m_re;

        internal ParsedRegex m_pre;

        public Python_Pattern(string pattern)
            : this(pattern, 0)
        {
        }

        public Python_Pattern(string pattern, int flags)
            : this(pattern, flags, false)
        {
        }

        public Python_Pattern(string pattern, int flags, bool compiled)
        {
            m_pre = PreParseRegex(pattern);

            RegexOptions opts = FlagsToOption(flags);

            this.m_re = new Regex(m_pre.Pattern, opts | (compiled ? RegexOptions.Compiled : RegexOptions.None));
        }

        /// 
        /// 傳入字符串查找是否匹配正則表達式
        /// 
        ///
        /// 
        public bool isMatched(string _str)
        {
            return m_re.Match(_str).Success;
        }

        public string Pattern
        {
            get
            {
                return m_pre.UserPattern;
            }
        }
    }

    private static RegexOptions FlagsToOption(int flags)
    {
        RegexOptions opts = RegexOptions.None;
        if ((flags & (int)IGNORECASE) != 0) opts |= RegexOptions.IgnoreCase;
        if ((flags & (int)MULTILINE) != 0) opts |= RegexOptions.Multiline;
        if (((flags & (int)LOCALE)) == 0) opts &= (~RegexOptions.CultureInvariant);
        if ((flags & (int)DOTALL) != 0) opts |= RegexOptions.Singleline;
        if ((flags & (int)VERBOSE) != 0) opts |= RegexOptions.IgnorePatternWhitespace;

        return opts;
    }

    internal class ParsedRegex
    {
        public ParsedRegex(string pattern)
        {
            this.UserPattern = pattern;
        }

        public string UserPattern;
        public string Pattern;
        public RegexOptions Options = RegexOptions.CultureInvariant;
    }

    /// 
    /// Preparses a regular expression text returning a ParsedRegex class
    /// that can be used for further regular expressions.
    /// 傳入一個python正則表達式,返回一個c#可用格式
    /// 
    private static ParsedRegex PreParseRegex(string pattern)
    {
        ParsedRegex res = new ParsedRegex(pattern);

        //string newPattern;
        int cur = 0, nameIndex;
        int curGroup = 0;
        bool containsNamedGroup = false;

        for (; ; )
        {
            nameIndex = pattern.IndexOf((, cur);
            if (nameIndex > 0 && pattern[nameIndex - 1] == '\')
            {
                int curIndex = nameIndex - 2;
                int backslashCount = 1;
                while (curIndex >= 0 && pattern[curIndex] == '\')
                {
                    backslashCount++;
                    curIndex--;
                }
                // odd number of back slashes, this is an optional
                // paren that we should ignore.
                if ((backslashCount & 0x01) != 0)
                {
                    cur++;
                    continue;
                }
            }

            if (nameIndex == -1) break;
            if (nameIndex == pattern.Length - 1) break;

            switch (pattern[++nameIndex])
            {
                case '?':
                    // extension syntax
                    if (nameIndex == pattern.Length - 1)
                    {
                        return null;
                    }

                    switch (pattern[++nameIndex])
                    {
                        case 'P':
                            //  named regex, .NET doesn't expect the P so we'll remove it;
                            //  also, once we see a named group i.e. ?P then we need to start artificially 
                            //  naming all unnamed groups from then on---this is to get around the fact that 
                            //  the CLR RegEx support orders all the unnamed groups before all the named 
                            //  groups, even if the named groups are before the unnamed ones in the pattern;
                            //  the artificial naming preserves the order of the groups and thus the order of
                            //  the matches
                            if (nameIndex + 1 < pattern.Length && pattern[nameIndex + 1] == '=')
                            {
                                // match whatever was previously matched by the named group

                                // remove the (?P=
                                pattern = pattern.Remove(nameIndex - 2, 4);
                                pattern = pattern.Insert(nameIndex - 2, \\k<);
                                int tmpIndex = nameIndex;
                                while (tmpIndex < pattern.Length && pattern[tmpIndex] != ')')
                                    tmpIndex++;

                                if (tmpIndex == pattern.Length)
                                {
                                    return null;
                                }

                                pattern = pattern.Substring(0, tmpIndex) + > + pattern.Substring(tmpIndex + 1);
                            }
                            else
                            {
                                containsNamedGroup = true;
                                pattern = pattern.Remove(nameIndex, 1);
                            }
                            break;
                        case 'i': res.Options |= RegexOptions.IgnoreCase; break;
                        case 'L': res.Options &= ~(RegexOptions.CultureInvariant); break;
                        case 'm': res.Options |= RegexOptions.Multiline; break;
                        case 's': res.Options |= RegexOptions.Singleline; break;
                        case 'u': break;
                        case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break;
                        case ':': break; // non-capturing
                        case '=': break; // look ahead assertion
                        case '<': break; // positive look behind assertion
                        case '!': break; // negative look ahead assertion
                        case '#': break; // inline comment
                        case '(':  // yes/no if group exists, we don't support this
                        default:
                            {
                                return null;
                            }
                    }
                    break;
                default:
                    // just another group
                    curGroup++;
                    if (containsNamedGroup)
                    {
                        // need to name this unnamed group
                        pattern = pattern.Insert(nameIndex, ?);
                    }
                    break;
            }

            cur = nameIndex;
        }

        cur = 0;
        for (; ; )
        {
            nameIndex = pattern.IndexOf('\', cur);

            if (nameIndex == -1 || nameIndex == pattern.Length - 1) break;
            char curChar = pattern[++nameIndex];
            switch (curChar)
            {
                case 'x':
                case 'u':
                case 'a':
                case 'b':
                case 'e':
                case 'f':
                case 'n':
                case 'r':
                case 't':
                case 'v':
                case 'c':
                case 's':
                case 'W':
                case 'w':
                case 'p':
                case 'P':
                case 'S':
                case 'd':
                case 'D':
                case 'Z':
                    // known escape sequences, leave escaped.
                    break;
                case '\':
                    // escaping a \
                    cur += 2;
                    break;
                default:
                    System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar);
                    switch (charClass)
                    {
                        // recognized word characters, always unescape.
                        case System.Globalization.UnicodeCategory.ModifierLetter:
                        case System.Globalization.UnicodeCategory.LowercaseLetter:
                        case System.Globalization.UnicodeCategory.UppercaseLetter:
                        case System.Globalization.UnicodeCategory.TitlecaseLetter:
                        case System.Globalization.UnicodeCategory.OtherLetter:
                        case System.Globalization.UnicodeCategory.LetterNumber:
                        case System.Globalization.UnicodeCategory.OtherNumber:
                        case System.Globalization.UnicodeCategory.ConnectorPunctuation:
                            pattern = pattern.Remove(nameIndex - 1, 1);
                            break;
                        case System.Globalization.UnicodeCategory.DecimalDigitNumber:
                            //  actually don't want to unescape '', '' etc. which are references to groups
                            break;
                    }
                    break;
            }
            cur++;
        }

        res.Pattern = pattern;
        return res;
    }

    static Random r = new Random(DateTime.Now.Millisecond);
    private static string GetRandomString()
    {
        return r.Next(Int32.MaxValue / 2, Int32.MaxValue).ToString();
    }
}