# numpy 簡單的數字識別OpenCV-Python中的OCR

## 創建樣本和標籤數據的代碼

``````//Process image to extract contour
Mat thr,gray,con;
cvtColor(src,gray,CV_BGR2GRAY);
threshold(gray,thr,200,255,THRESH_BINARY_INV); //Threshold to find contour
thr.copyTo(con);

// Create sample and label data
vector< vector <Point> > contours; // Vector for storing contour
vector< Vec4i > hierarchy;
Mat sample;
Mat response_array;
findContours( con, contours, hierarchy,CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE ); //Find contour

for( int i = 0; i< contours.size(); i=hierarchy[i][0] ) // iterate through first hierarchy level contours
{
Rect r= boundingRect(contours[i]); //Find bounding rect for each contour
rectangle(src,Point(r.x,r.y), Point(r.x+r.width,r.y+r.height), Scalar(0,0,255),2,8,0);
Mat ROI = thr(r); //Crop the image
Mat tmp1, tmp2;
resize(ROI,tmp1, Size(10,10), 0,0,INTER_LINEAR ); //resize to 10X10
tmp1.convertTo(tmp2,CV_32FC1); //convert to float
sample.push_back(tmp2.reshape(1,1)); // Store  sample data
imshow("src",src);
int c=waitKey(0); // Read corresponding label for contour from keyoard
c-=0x30;     // Convert ascii to intiger value
response_array.push_back(c); // Store label to a mat
rectangle(src,Point(r.x,r.y), Point(r.x+r.width,r.y+r.height), Scalar(0,255,0),2,8,0);
}

// Store the data to file
Mat response,tmp;
tmp=response_array.reshape(1,1); //make continuous
tmp.convertTo(response,CV_32FC1); // Convert  to float

FileStorage Data("TrainingData.yml",FileStorage::WRITE); // Store the sample data in a file
Data << "data" << sample;
Data.release();

FileStorage Label("LabelData.yml",FileStorage::WRITE); // Store the label data in a file
Label << "label" << response;
Label.release();
cout<<"Training and Label data created successfully....!! "<<endl;

imshow("src",src);
waitKey();
``````

## 培訓和測試代碼

``````Mat thr,gray,con;
cvtColor(src,gray,CV_BGR2GRAY);
threshold(gray,thr,200,255,THRESH_BINARY_INV); // Threshold to create input
thr.copyTo(con);

// Read stored sample and label for training
Mat sample;
Mat response,tmp;
Data["data"] >> sample;
Data.release();

Label["label"] >> response;
Label.release();

KNearest knn;
knn.train(sample,response); // Train with sample and responses
cout<<"Training compleated.....!!"<<endl;

vector< vector <Point> > contours; // Vector for storing contour
vector< Vec4i > hierarchy;

//Create input sample by contour finding and cropping
findContours( con, contours, hierarchy,CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE );
Mat dst(src.rows,src.cols,CV_8UC3,Scalar::all(0));

for( int i = 0; i< contours.size(); i=hierarchy[i][0] ) // iterate through each contour for first hierarchy level .
{
Rect r= boundingRect(contours[i]);
Mat ROI = thr(r);
Mat tmp1, tmp2;
resize(ROI,tmp1, Size(10,10), 0,0,INTER_LINEAR );
tmp1.convertTo(tmp2,CV_32FC1);
float p=knn.find_nearest(tmp2.reshape(1,1), 1);
char name[4];
sprintf(name,"%d",(int)p);
putText( dst,name,Point(r.x,r.y+r.height) ,0,1, Scalar(0, 255, 0), 2, 8 );
}

imshow("src",src);
imshow("dst",dst);
imwrite("dest.jpg",dst);
waitKey();
``````

## 結果

python opencv numpy computer-vision ocr

OpenCV示例附帶了一個樣本`letter_recog.py` 。 但我仍然無法弄清楚如何使用它。 我不明白什麼是樣本，回復等。另外，它首先加載一個txt文件，這是我首先不了解的。

``````import numpy as np
import cv2

fn = 'letter-recognition.data'
a = np.loadtxt(fn, np.float32, delimiter=',', converters={ 0 : lambda ch : ord(ch)-ord('A') })
samples, responses = a[:,1:], a[:,0]

model = cv2.KNearest()
retval = model.train(samples,responses)
retval, results, neigh_resp, dists = model.find_nearest(samples, k = 10)
print results.ravel()
``````

1）什麼是letter_recognition.data文件？ 如何從我自己的數據集中構建該文件？

2） `results.reval()`表示什麼？

3）我們如何使用letter_recognition.data文件（KNearest或SVM）編寫簡單的數字識別工具？

### Tags

python   opencv   numpy   computer-vision   ocr