效果 項目 代碼 using Aspose.Cells; using NLog; using OpenCvSharp; using OpenVINO.OCRService; using Sdcb.OpenVINO; using Sdcb.OpenVINO.PaddleOCR; using Sdcb.OpenVINO.PaddleOCR.Models; using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Data; using System.Diagnostics; using System.Drawing; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Threading; using System.Threading.Tasks; using System.Windows.Forms;
namespace OpenVINO.OCR { public partial class frmMain : Form { public frmMain() { InitializeComponent(); NLog.Windows.Forms.RichTextBoxTarget.ReInitializeAllTextboxes(this); }
String startupPath; private string excelFileFilter = '表格|*.xlsx;*.xls;'; private Logger log = NLog.LogManager.GetCurrentClassLogger(); CancellationTokenSource cts;
ConcurrentQueue<ImgInfo> ltImgInfo = new ConcurrentQueue<ImgInfo>(); ConcurrentQueue<ImgInfo> matQueue = new ConcurrentQueue<ImgInfo>();
bool saveImg = false; bool saveOcr = false;
int ocrNum = 0;//完成OCR識別的數(shù)量 int totalCount = 0;//圖片總數(shù)量 int downloadCount = 0;//圖片下載數(shù)量 int vioIDCount = 0;//違規(guī)ID;
private void frmMain_Load(object sender, EventArgs e) { //初始化 startupPath = System.Windows.Forms.Application.StartupPath;
string detectionModelDir = startupPath + '\\inference\\ch_PP-OCRv3_det_infer'; string classificationModelDir = startupPath + '\\inference\\ch_ppocr_mobile_v2.0_cls_infer'; string recognitionModelDir = startupPath + '\\inference\\ch_PP-OCRv3_rec_infer'; string labelFilePath = startupPath + '\\inference\\ppocr_keys.txt';
FullOcrModel model = FullOcrModel.FromDirectory(detectionModelDir, classificationModelDir, recognitionModelDir, labelFilePath, ModelVersion.V3);
PaddleOcrOptions paddleOcrOptions = new PaddleOcrOptions(); paddleOcrOptions.DetectionDeviceOptions = new DeviceOptions('CPU'); paddleOcrOptions.DetectionStaticSize = new OpenCvSharp.Size(800, 800); paddleOcrOptions.RecognitionStaticWidth = 512;
Program.paddleOcr = new PaddleOcrAll(model, paddleOcrOptions); Program.paddleOcr.Detector.UnclipRatio = 1.5f; Program.paddleOcr.AllowRotateDetection = true; /* 允許識別有角度的文字 */ Program.paddleOcr.Enable180Classification = false; /* 允許識別旋轉(zhuǎn)角度大于90度的文字 */
ServicePointManager.Expect100Continue = false; ServicePointManager.DefaultConnectionLimit = 512;
//加載違禁詞 Common.ltRuleContains.Clear(); Common.ltRuleTel.Clear();
string ruleContainsPath = 'rules\\rule_contains.txt'; if (File.Exists(ruleContainsPath)) { Common.ltRuleContains = File.ReadAllLines(ruleContainsPath).ToList();
} StringBuilder sb = new StringBuilder(); foreach (var item in Common.ltRuleContains) { sb.AppendLine(item); } log.Info('rule_contains.txt---->包含' + Common.ltRuleContains.Count() + '個違禁詞,內(nèi)容如下:\r\n' + sb.ToString());
string ruleTelPath = 'rules\\rule_tel.txt'; if (File.Exists(ruleTelPath)) { foreach (var item in File.ReadAllLines(ruleTelPath)) { Common.ltRuleTel.Add(item.ToLower()); } }
sb.Clear(); foreach (var item in Common.ltRuleTel) { sb.AppendLine(item); } log.Info('rule_tel.txt---->包含' + Common.ltRuleTel.Count() + '個號碼前綴,內(nèi)容如下:\r\n' + sb.ToString());
}
/// <summary> /// 選擇表格 /// </summary> /// <param name='sender'></param> /// <param name='e'></param> private void button2_Click(object sender, EventArgs e) { try { OpenFileDialog ofd = new OpenFileDialog(); ofd.Filter = excelFileFilter; if (ofd.ShowDialog() != DialogResult.OK) return;
log.Info('解析中……'); Application.DoEvents();
Stopwatch sw = new Stopwatch(); sw.Start(); //開始計時
string excelPath = ofd.FileName;
Workbook workbook = new Workbook(excelPath); Cells cells = workbook.Worksheets[0].Cells; System.Data.DataTable dataTable1 = cells.ExportDataTable(1, 0, cells.MaxDataRow, cells.MaxColumn + 1);//noneTitle
ltImgInfo = new ConcurrentQueue<ImgInfo>();
//遍歷 ImgInfo temp; int imgCount = 0; foreach (DataRow row in dataTable1.Rows) { temp = new ImgInfo(); temp.id = row[0].ToString(); temp.title = row[1].ToString();
List<MatInfo> list = new List<MatInfo>(); for (int i = 2; i < cells.MaxColumn + 1; i++) {
string tempStr = row[i].ToString(); if (!string.IsNullOrEmpty(tempStr)) { if (i >= 7) { List<string> ltScrUrlTemp = Common.GetScrUrl(tempStr); if (ltScrUrlTemp.Count > 0) { foreach (var item in ltScrUrlTemp) {
MatInfo matInfo = new MatInfo(); matInfo.url = item; list.Add(matInfo); } } } else { MatInfo matInfo = new MatInfo(); matInfo.url = tempStr; list.Add(matInfo); } } } temp.images = list; imgCount = imgCount + list.Count(); ltImgInfo.Enqueue(temp);
//for test //if (ltImgInfo.Count()>10) //{ // break; //} } log.Info('解析完畢,一共[' + ltImgInfo.Count + ']條記錄,[' + imgCount + ']張圖片,耗時:' + sw.ElapsedMilliseconds + '毫秒'); } catch (Exception ex) { log.Error('解析表格異常:' + ex.Message); MessageBox.Show('解析表格異常:' + ex.Message); } }
void ShowCostTime(string total, string ocrNum, string downloadCount, long time, int vioIDCount) { txtTotal.Invoke(new Action(() => { TimeSpan ts = TimeSpan.FromMilliseconds(time); txtTotal.Text = string.Format('下載完成:{0}/{1},識別完成:{2}/{3},違規(guī)ID數(shù)量:{5},用時:{4}' , downloadCount , total , ocrNum , total , ts.ToString() , vioIDCount ); })); }
/// <summary> /// 下載識別 /// </summary> /// <param name='sender'></param> /// <param name='e'></param> private void button1_Click(object sender, EventArgs e) { if (ltImgInfo.Count == 0) { MessageBox.Show('請先選擇表格!'); return; }
DialogResult result = MessageBox.Show('確認(rèn)開始下載識別?此操作會清空上一次的數(shù)據(jù),請注意備份!', '確認(rèn)', MessageBoxButtons.YesNo, MessageBoxIcon.Question); if (result == DialogResult.Yes) { log.Info('確認(rèn)開始下載識別!'); } else { log.Info('取消開始下載識別!'); return; }
if (!Directory.Exists('img')) { Directory.CreateDirectory('img'); }
if (!Directory.Exists('ocr_result')) { Directory.CreateDirectory('ocr_result'); }
if (!Directory.Exists('result')) { Directory.CreateDirectory('result'); }
if (!Directory.Exists('result//img')) { Directory.CreateDirectory('result//img'); }
//清空結(jié)果 File.WriteAllText('result//result.txt', ''); File.WriteAllText('result//result_detail.txt', ''); // 清空文件夾中的文件 foreach (string filePath in Directory.GetFiles('result', '*', SearchOption.AllDirectories)) { File.Delete(filePath); }
// 寫入列標(biāo)題 File.WriteAllText('result//result.txt', 'id\ttitel\tcontent\r\n');
btnStop.Enabled = true; btnStart.Enabled = false; chkSaveImg.Enabled = false; chkSaveOcr.Enabled = false;
if (chkSaveImg.Checked) { saveImg = true; } else { saveImg = false; }
if (chkSaveOcr.Checked) { saveOcr = true; } else { saveOcr = false; }
Application.DoEvents();
cts = new CancellationTokenSource();
Stopwatch total = new Stopwatch(); total.Start(); //開始計時
// 清空隊列 matQueue = new ConcurrentQueue<ImgInfo>(); //while (!matQueue.IsEmpty) //{ // matQueue.TryDequeue(out _); //}
ocrNum = 0;//完成OCR識別的數(shù)量 totalCount = ltImgInfo.Count();//圖片總數(shù)量 downloadCount = 0;
//下載線程 int downloadThreadNum = 2; for (int i = 0; i < downloadThreadNum; i++) { Task.Factory.StartNew(() => { while (true) { //判斷是否被取消; if (cts.Token.IsCancellationRequested) { return; }
if (downloadCount == totalCount) { log.Info('--------------------------------->下載完成!<----------------------------------'); return; }
ImgInfo imgInfo = new ImgInfo(); if (ltImgInfo.TryDequeue(out imgInfo)) { //隊列容量大于50 休息一秒 if (matQueue.Count > 50) { System.Threading.Thread.Sleep(1000); }
if (matQueue.Count > 100) { System.Threading.Thread.Sleep(2000); }
int imagesCount = imgInfo.images.Count(); for (int j = 0; j < imagesCount; j++) { try { Stopwatch sw = new Stopwatch(); sw.Start(); //開始計時 HttpWebRequest request = WebRequest.Create(imgInfo.images[j].url) as HttpWebRequest; request.KeepAlive = false; request.ServicePoint.Expect100Continue = false; request.Timeout = 2000;// 2秒 request.ReadWriteTimeout = 2000;//2秒
request.ServicePoint.UseNagleAlgorithm = false; request.ServicePoint.ConnectionLimit = 65500; request.AllowWriteStreamBuffering = false; request.Proxy = null;
request.CookieContainer = new CookieContainer(); request.CookieContainer.Add(new Cookie('AspxAutoDetectCookieSupport', '1') { Domain = new Uri(imgInfo.images[j].url).Host });
HttpWebResponse wresp = (HttpWebResponse)request.GetResponse(); Stream s = wresp.GetResponseStream(); Bitmap bmp = (Bitmap)System.Drawing.Image.FromStream(s); s.Dispose(); wresp.Close(); wresp.Dispose(); request.Abort();
sw.Stop();
if (saveImg) { bmp.Save('img//' + imgInfo.id + '_' + j + '.jpg'); }
var mat = OpenCvSharp.Extensions.BitmapConverter.ToMat(bmp);
if (mat.Channels() == 4) { Cv2.CvtColor(mat, mat, ColorConversionCodes.BGRA2BGR); }
imgInfo.images[j].mat = mat; imgInfo.images[j].name = imgInfo.id + '_' + j;
if (saveImg) { bmp.Save('img//' + imgInfo.images[j].name + '.jpg'); }
log.Info(' ' + imgInfo.images[j].name + '-->下載用時:' + sw.ElapsedMilliseconds + '毫秒'); } catch (Exception ex) { log.Error('---->id:' + imgInfo.id + ',url[' + imgInfo.images[j].url + '],下載異常:' + ex.Message); } } matQueue.Enqueue(imgInfo); Interlocked.Increment(ref downloadCount); }
} }, TaskCreationOptions.LongRunning);
}
//識別線程 Task.Factory.StartNew(() => { while (true) { //判斷是否被取消; if (cts.Token.IsCancellationRequested) { return; }
if (ocrNum == totalCount) { log.Info('--------------------------------->識別完成!<----------------------------------'); return; }
ImgInfo imgInfo = new ImgInfo(); if (matQueue.TryDequeue(out imgInfo)) {
Stopwatch perID = new Stopwatch(); perID.Start();//開始計時 int imagesCount = imgInfo.images.Count(); for (int j = 0; j < imagesCount; j++) { //Mat mat= imgInfo.images[j].mat; Stopwatch sw = new Stopwatch(); sw.Start(); //開始計時 PaddleOcrResult ocrResult = null; try { if (imgInfo.images[j].mat != null && (!imgInfo.images[j].mat.Empty())) { ocrResult = Program.paddleOcr.Run(imgInfo.images[j].mat);
sw.Stop(); log.Info(' ' + imgInfo.images[j].name + '---->識別用時:' + sw.ElapsedMilliseconds + '毫秒');
//string ocrInfo = ocrResult.Text.ToString();
string ocrInfo = string.Join('\n', from x in ocrResult.Regions where x.Score > 0.8 orderby x.Rect.Center.Y, x.Rect.Center.X select x.Text);
if (saveOcr) { File.WriteAllText('ocr_result//' + imgInfo.images[j].name + '.txt', ocrInfo); }
//規(guī)則校驗 Stopwatch ruleSw = new Stopwatch(); ruleSw.Start();//開始計時 ocrInfo = ocrInfo.Trim(); ocrInfo = ocrInfo.Replace(' ', '');
string words = ''; string resultInfo = ''; if (Common.RuleContainsCheck(ocrInfo, out words, ocrResult)) { resultInfo = string.Format('ID:{0},Title:[{1}],------>包含違禁詞:{2}', imgInfo.id, imgInfo.title, words); log.Info(resultInfo);
//存數(shù)據(jù) File.AppendAllText('result//result.txt', imgInfo.id + '\t' + imgInfo.title + '\t包含違禁詞:' + words + '\r\n'); File.AppendAllText('result//result_detail.txt', '-------->\r\n' + resultInfo + ',識別內(nèi)容' + ocrInfo + '\r\n<--------\r\n');
//存圖 Cv2.ImWrite('result//img//' + imgInfo.images[j].name + '.jpg', imgInfo.images[j].mat); imgInfo.images[j].mat.Dispose();
Interlocked.Increment(ref vioIDCount);
break; }
if (Common.RuleTelCheck(ocrInfo, out words, ocrResult)) { resultInfo = string.Format('ID:{0},Title:[{1}],------>疑似包含電話號碼:{2}', imgInfo.id, imgInfo.title, words); log.Info(resultInfo); //File.AppendAllText('result//result.txt', resultInfo+ '\r\n'); File.AppendAllText('result//result.txt', imgInfo.id + '\t' + imgInfo.title + '\t疑似包含電話號碼:' + words + '\r\n'); File.AppendAllText('result//result_detail.txt', '-------->\r\n' + resultInfo + ',識別內(nèi)容' + ocrInfo + '\r\n<--------\r\n');
//存圖 Cv2.ImWrite('result//img//' + imgInfo.images[j].name + '.jpg', imgInfo.images[j].mat); imgInfo.images[j].mat.Dispose();
Interlocked.Increment(ref vioIDCount);
break; } imgInfo.images[j].mat.Dispose(); ruleSw.Stop(); //log.Info(' ' + imgInfo.images[j].name + '---->違禁詞校驗用時:' + ruleSw.ElapsedMilliseconds + '毫秒'); } } catch (Exception ex) { imgInfo.images[j].mat.Dispose(); log.Info(' ' + imgInfo.images[j].name + '---->識別異常:' + ex.Message); } }
perID.Stop(); log.Info('---->id:' + imgInfo.id + ',圖片張數(shù)[' + imagesCount + '],識別小計用時:' + perID.ElapsedMilliseconds + '毫秒'); Interlocked.Increment(ref ocrNum); ShowCostTime(totalCount.ToString(), ocrNum.ToString(), downloadCount.ToString(), total.ElapsedMilliseconds, vioIDCount);
} } }, TaskCreationOptions.LongRunning); }
/// <summary> /// 停止 /// </summary> /// <param name='sender'></param> /// <param name='e'></param> private void button3_Click(object sender, EventArgs e) { cts.Cancel(); btnStop.Enabled = false; btnStart.Enabled = true;
chkSaveImg.Enabled = true; chkSaveOcr.Enabled = true; } } }
|