private void FindNoUTFFile(string Path)
{
System.IO.StreamReader reader = null;
StringBuilder sb;
StringBuilder sb2;
DirectoryInfo Folder = new System.IO.DirectoryInfo(Path);
DirectoryInfo[] subFolders = Folder.GetDirectorIEs();
for (int i=0;i<subFolders.Length;i++)
{
FindNoUTFFile(subFolders[i].FullName);
}
FileInfo[] subFiles = Folder.GetFiles();
for(int j=0;j<subFiles.Length ;j++)
{
if(CheckFileType(subFiles[j].Extension.ToLower()))
{
FileStream fs = new FileStream(subFiles[j].FullName , FileMode.Open,FileAccess.Read);
sb = new StringBuilder();
sb2 = new StringBuilder();
bool bUtf8 =IsUTF8(fs);
fs.Close();
if (!bUtf8)
{
reader = new System.IO.StreamReader(subFiles[j].FullName,System.Text.Encoding.UTF8);
sb2.Append(reader.ReadToEnd());
reader.Close();
reader = new System.IO.StreamReader(subFiles[j].FullName, System.Text.Encoding.Default,true);
sb.Append(reader.ReadToEnd());
reader.Close();
}
}
}
}
//0000 0000-0000 007F - 0xxxxxxx (ascii converts to 1 octet!)
//0000 0080-0000 07FF - 110xxxxx 10xxxxxx ( 2 octet format)
//0000 0800-0000 FFFF - 1110xxxx 10xxxxxx 10xxxxxx (3 octet format)
private static bool IsUTF8(FileStream sbInputStream)
{
int i;
byte cOctets; // octets to go in this UTF-8 encoded character
byte chr;
bool bAllAscii= true;
long iLen = sbInputStream.Length;
cOctets= 0;
for( i=0; i < iLen; i++ )
{
chr = (byte)sbInputStream.ReadByte();
if( (chr & 0x80) != 0 ) bAllAscii= false;
if( cOctets == 0 )
{
if( chr >= 0x80 )
{
do
{
chr <<= 1;
cOctets++;
}
while( (chr & 0x80) != 0 );
cOctets--;
if( cOctets == 0 ) return false;
}
}
else
{
if( (chr & 0xC0) != 0x80 )
{
return false;
}
cOctets--;
}
}
if( cOctets > 0 )
{
return false;
}
if( bAllAscii )
{
return false;
}
return true;
}
}
}