在我的 C++程序中需要调用 Python 做机器学习方面的工作,我的配置是 Win7 x64, Anaconda3.5.1(Python 3.6.4),另外安装了 XGBoost、LightGBM、TensorFlow、Kearas 包, C++程序用
VS2010。为什么当 C++是控制台程序的时候就可以正常调用 Python,而换成带界面的 MFC 程序的时候就不行了呢? C++在运行到 PyObject * pyModule = PyImport_ImportModule(
szPythonFilename );这句的时候总是返回空指针。这个问题困扰了我好几个星期了,其中尝试过升级 Python、换电脑、Win7 换成 Win10 都不行。希望各位大佬给指点指点,问题出在了哪里?
Python 程序
Predict.py 的代码如下:
'''python
import xgboost
#import lightgbm
import sklearn
import multiprocessing
import numpy
import keras
import os
#程序用到的所有机器学习模型的名字
def GetAllModels():
Models = ["LinearRegression", "LogisticRegression", "XGBRegressor", "XGBClassifier",
"RandomForestRegressor", "RandomForestClassifier", "LGBMRegressor", "LGBMClassifier",
"NetworkRegressor", "NetworkClassifier"]
return Models
#特征选择
def SelectFeatures( AllFeatures, SelectedFeatureTags ):
SelectedFeatureIndice = []
for i in range( 0, len( SelectedFeatureTags ) ):
if SelectedFeatureTags[ i ] == 1: SelectedFeatureIndice.append( i )
SelectedFeatures = AllFeatures[ :, SelectedFeatureIndice ]
return SelectedFeatures
#模型的一些属性,如是分类器还是回归、是否有宏参等
def GetModelOptions( ModelName ):
IsClassifier, ParellelNum, CanUsePP, HaveMacroParams, IsNetwork = False, 1, True, True, False
CpuNum = multiprocessing.cpu_count()
Name = ModelName.lower()
if "classifier" in Name or Name == "logisticregression" or Name == "svc" or Name == "gaussiannb":
IsClassifier = True
if Name == "svc" or Name == "svr":
ParellelNum = CpuNum
if "network" in Name or "lgbm" in Name:
CanUsePP = False
if Name == "linearregression" or Name == "gaussiannb":
HaveMacroParams = False
if "network" in Name:
IsNetwork = True
return IsClassifier, ParellelNum, CanUsePP, HaveMacroParams, IsNetwork
#读取、预测各个模型并进行模型融合
def PredictOneLayer( Features, PathAndBasicName, LayerName, Models ):
ImputerFile = PathAndBasicName + "." + LayerName + ".MeanImputer.pkl"
if not os.path.exists( ImputerFile ): return None, "Error! %s not exists."%ImputerFile
Imputer = sklearn.externals.joblib.load( ImputerFile )
Features = Imputer.transform( Features )
ScalerFile = PathAndBasicName + "." + LayerName + ".Scaler.pkl"
if not os.path.exists( ScalerFile ): return None, "Error! %s not exists."%ScalerFile
Scaler = sklearn.externals.joblib.load( ScalerFile )
Features = Scaler.transform( Features )
PcaFile = PathAndBasicName + "." + LayerName + ".PCA.pkl"
if os.path.exists( PcaFile ):
Pca = sklearn.externals.joblib.load( PcaFile )
Features = Pca.transform( Features )
LayerPredicts = []
for ModelName in Models:
IsClassifier, ParellelNum, CanUsePP, HaveMacroParams, IsNetwork = GetModelOptions( ModelName )
FeatureTagFile = PathAndBasicName + "." + LayerName + "." + ModelName + ".FeatureTags.npy"
if IsNetwork:
FeatureTagFile = PathAndBasicName + "." + LayerName + ".LinearRegression.FeatureTags.npy"
if not os.path.exists( FeatureTagFile ): return None, "Error! %s not exists."%FeatureTagFile
FeatureTags = numpy.load( FeatureTagFile )
if Features.shape[1] != len( FeatureTags ): return None, "FeatureTags not match Feature shape"
SelectedFeatures = SelectFeatures( Features, FeatureTags )
if IsNetwork:
ModelFile = PathAndBasicName + "." + LayerName + "." + ModelName + ".h5"
if not os.path.exists( ModelFile ): return None, "Error! %s not exists."%ModelFile
Model = keras.models.load_model( ModelFile )
RawPredicts = Model.predict( SelectedFeatures )
ModelPredicts = RawPredicts.flatten()
else:
if "xgb" in ModelName.lower():
ModelFile = PathAndBasicName + "." + LayerName + "." + ModelName + ".model"
if not os.path.exists( ModelFile ): return None, "Error! %s not exists."%ModelFile
Model = xgboost.Booster( model_file = ModelFile )
DMatrix = xgboost.DMatrix( SelectedFeatures )
ModelPredicts = Model.predict( DMatrix )
else:
ModelFile = PathAndBasicName + "." + LayerName + "." + ModelName + ".pkl"
if not os.path.exists( ModelFile ): return None, "Error! %s not exists."%ModelFile
Model = sklearn.externals.joblib.load( ModelFile )
if IsClassifier: ModelPredicts = Model.predict_proba( SelectedFeatures )[ :, 1 ]
else: ModelPredicts = Model.predict( SelectedFeatures )
LayerPredicts.append( ModelPredicts )
LayerPredicts = numpy.transpose( numpy.array( LayerPredicts ) )
return LayerPredicts, "OK"
#主函数,对样本进行最终预测
def FinalPredict( Features, PathAndBasicName, FinalModel ):
Models = GetAllModels()
L1Predicts, Comment = PredictOneLayer( Features, PathAndBasicName, "L1", Models )
if L1Predicts is None: return None, Comment
L2Predicts, Comment = PredictOneLayer( L1Predicts, PathAndBasicName, "L2", Models )
if L2Predicts is None: return None, Comment
FinalModels = []
FinalModels.append( FinalModel )
FinalPredicts, Comment = PredictOneLayer( L2Predicts, PathAndBasicName, "L3", FinalModels )
if FinalPredicts is None: return None, Comment
FinalPredicts = numpy.transpose( numpy.array( FinalPredicts ) )
Predicts = FinalPredicts[0]
Predicts = Predicts.tolist()
return Predicts, "OK"
'''
C++的主要代码如下:
'''python
//构造函数,调用 python 的初始化工作
CPython::CPython()
{
srand( 0 );
Py_Initialize();
InitNumpy();
//将
Predict.py 所在的目录设置为工作目录,确保能够调用它
char szWorkPath[ MAX_PATH ] = "";
GetCurrentDirectory( sizeof(szWorkPath), szWorkPath );
GetModuleFileName( NULL, szWorkPath, sizeof(szWorkPath) );
char * p = szWorkPath + strlen( szWorkPath );
while( *p != '\\' && *p != ':' && p > szWorkPath ) p--;
*p = 0;
SetCurrentDirectory( szWorkPath );
PyRun_SimpleString( "import os,sys" );
PyRun_SimpleString( "sys.path.append( os.getcwd() )" );
//PyRun_SimpleString( "print( sys.path )" );
}
//析构函数
CPython::~CPython()
{
Py_Finalize();
}
//初始化 Numpy
int CPython::InitNumpy()
{
import_array();
return 0;
}
//C++调用 Python 的主函数,按照标准流程一步一步进行
bool CPython::PredictByPython( double * pddFeatures, int nFeatureNum, int nSampleNum, char * szPythonFilename,
char * szPredictFuncName, char * szModelPathAndBasicName, char * szFinalModelName,
double * pdPredicts, char * szErrorInfo, int nErrorInfoSize )
{
sprintf_s( szErrorInfo, nErrorInfoSize, "OK!" );
npy_intp npyDims[ 2 ] = { nSampleNum, nFeatureNum };
PyObject * pyArray = PyArray_SimpleNewFromData( 2, npyDims, NPY_DOUBLE, pddFeatures );
PyObject * pyArguments = PyTuple_New( 3 );
PyTuple_SetItem( pyArguments, 0, pyArray );
PyTuple_SetItem( pyArguments, 1, Py_BuildValue( "s", szModelPathAndBasicName ) );
PyTuple_SetItem( pyArguments, 2, Py_BuildValue( "s", szFinalModelName ) );
bool bIsCorrect = true;
PyObject * pyModule = PyImport_ImportModule( szPythonFilename );//当 MFC 运行到此时总是返回 NULL
PyObject * pyDict = NULL, * pyFunc = NULL, * pyReSult = NULL;
if( pyModule )
{
pyDict = PyModule_GetDict( pyModule );
if( pyDict )
{
pyFunc = PyDict_GetItemString( pyDict, szPredictFuncName );
if( pyFunc )
{
pyReSult = PyObject_CallObject( pyFunc, pyArguments );
if( pyReSult )
{
PyObject * pyErrorInfo = PyTuple_GetItem( pyReSult, 1 );
char * pszReturnedString = NULL, szReturnInfo[ 256 ] = "";
if( pyErrorInfo )
{
PyArg_Parse( pyErrorInfo, "s", & pszReturnedString );
strcpy_s( szReturnInfo, pszReturnedString );
Py_DECREF( pyErrorInfo );
}
else strcpy_s( szReturnInfo, "Can not get returned error info." );
PyObject * pyPredictResult = PyTuple_GetItem( pyReSult, 0 );
if( PyList_Check( pyPredictResult ) )
{
int nResultNum = PyList_Size( pyPredictResult );
if( nResultNum == nSampleNum )
{
for( int i=0; i<nResultNum; i++ )
{
PyObject * pyItem = PyList_GetItem( pyPredictResult, i );
double dTheItemResult = 0.0;
PyArg_Parse( pyItem, "d", & dTheItemResult );
pdPredicts[ i ] = dTheItemResult;
Py_DECREF( pyItem );
}
}
else
{
bIsCorrect = false;
sprintf_s( szErrorInfo, nErrorInfoSize, "Error! Returned predict num does not fit.%s", szReturnInfo );
}
}
else
{
bIsCorrect = false;
sprintf_s( szErrorInfo, nErrorInfoSize, "Error! Type of return is not correct.%s", szReturnInfo );
}
if( pyPredictResult ) Py_DECREF( pyPredictResult );
}
else
{
bIsCorrect = false;
sprintf_s( szErrorInfo, nErrorInfoSize, "Error! Can not get predict result." );
}
}
else
{
bIsCorrect = false;
sprintf_s( szErrorInfo, nErrorInfoSize, "Error! No such function name: %s.", szPredictFuncName );
}
}
else
{
bIsCorrect = false;
sprintf_s( szErrorInfo, nErrorInfoSize, "Error! No functions in module." );
}
}
else
{
bIsCorrect = false;
sprintf_s( szErrorInfo, nErrorInfoSize, "Error! No python file name: %s.", szPythonFilename );
}
if( pyReSult ) Py_DECREF( pyReSult );
if( pyFunc ) Py_DECREF( pyFunc );
if( pyDict ) Py_DECREF( pyDict );
if( pyModule ) Py_DECREF( pyModule );
Py_DECREF( pyArguments );
return bIsCorrect;
}
'''
用控制台程序的测试代码如下:
'''python
#include "..\CPython.h"
int main()
{
//随机生成 100 个样本,974 是特征维度,不能变
int nFeatureNum = 974, nSampleNum = 100;
double * pddFeatures = new double[ nFeatureNum * nSampleNum ];
CPython::GenarateRandomTestFeatures( pddFeatures, nFeatureNum, nSampleNum );
double * pdPredicts = new double[ nSampleNum ];
memset( pdPredicts, 0, sizeof(double) * nSampleNum );
char szErrorInfo[ 256 ] = "";
//调用 Python
CPython cPython;
cPython.PredictByPython( pddFeatures, nFeatureNum, nSampleNum, "Predict", "FinalPredict",
"D:\\C++ Programs\\TryPython\\Models\\T1_GuDiFanZhuan", "LinearRegression",
pdPredicts, szErrorInfo, sizeof(szErrorInfo) );
//输出结果
printf( "Return info: %s\n", szErrorInfo );
for( int i=0; i<nSampleNum; i++ )
{
printf( "%d %.6f\n", i, pdPredicts[ i ] );
}
delete [] pddFeatures;
delete [] pdPredicts;
return 0;
}
'''
使用带界面的 MFC 程序的测试代码如下:
'''python
void CTryPythonInMFCDlg::OnBnClickedGo()
{
// TODO: 在此添加控件通知处理程序代码
GetDlgItem( ID_GO )->EnableWindow( FALSE );
GetDlgItem( IDC_RUN_INFO )->SetWindowText( "Running..." );
//随机生成 100 个样本,974 是特征维度,不能变
int nFeatureNum = 974, nSampleNum = 100;
double * pddFeatures = new double[ nFeatureNum * nSampleNum ];
CPython::GenarateRandomTestFeatures( pddFeatures, nFeatureNum, nSampleNum );
double * pdPredicts = new double[ nSampleNum ];
memset( pdPredicts, 0, sizeof(double) * nSampleNum );
char szErrorInfo[ 256 ] = "";
//调用 Python
CPython cPython;
cPython.PredictByPython( pddFeatures, nFeatureNum, nSampleNum, "Predict", "FinalPredict",
"D:\\C++ Programs\\TryPython\\Models\\T1_GuDiFanZhuan", "LinearRegression",
pdPredicts, szErrorInfo, sizeof(szErrorInfo) );
//显示运行结果的信息
GetDlgItem( IDC_RUN_INFO )->SetWindowText( szErrorInfo );
delete [] pddFeatures;
delete [] pdPredicts;
GetDlgItem( ID_GO )->EnableWindow( TRUE );
}
'''
另外,如果把
Predict.py 换成如下的极简函数,则无论控制台还是界面程序都能正常运行:
'''python
def FinalPredict( Features, PathAndBasicName, FinalModel ):
Predicts = []
for item in Features: Predicts.append( 0.5 )
return Predicts, "OK!"
'''
希望各位给指点指点吧!