/* * pymigemo.c - C/Migemo Python binding * Copyright(C) 2005-2012, Atzm WATANABE * * $Id: pymigemo.c 227 2012-11-10 12:20:12Z atzm $ */ #include #include #include #include #include #include #include #include #include #include #include #define PYMIGEMO_VERSION "0.4" #if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 1) || PY_MAJOR_VERSION > 3 # define PYTHON3 #endif #ifndef Py_TYPE # define Py_TYPE(ob) (((PyObject *)(ob))->ob_type) #endif /* for dereference migemo object members */ struct _migemo { int enable; void *mtree; int charset; void *roma2hira; void *hira2kata; void *han2zen; void *zen2han; void *rx; void *addword; void *char2int; }; typedef struct { PyObject_HEAD migemo *migemo_obj; } Migemo; static bool get_encoding(unsigned char *encoding, size_t size, int charset) { unsigned char *enc; switch(charset) { case 1: enc = "cp932"; break; case 2: enc = "euc_jp"; break; case 3: enc = "utf_8"; break; default: enc = "ascii"; } if (strlen(enc) < size) { strcpy(encoding, enc); return true; } return false; } static unsigned char * trans_string(PyObject *pystr, const unsigned char *encoding) { unsigned char *str; if (PyUnicode_Check(pystr)) { PyObject *tmp = PyUnicode_AsEncodedString(pystr, encoding, "strict"); if (tmp == NULL) { return NULL; } str = strdup(PyBytes_AS_STRING(tmp)); Py_DECREF(tmp); if (str == NULL) { PyErr_NoMemory(); return NULL; } return str; } if (PyBytes_Check(pystr)) { if ((str = strdup(PyBytes_AS_STRING(pystr))) == NULL) { PyErr_NoMemory(); return NULL; } return str; } PyErr_SetString(PyExc_TypeError, "argument must be string"); return NULL; } static int isloadable(const unsigned char *path) { struct stat st; int ret = 0; int fd = open(path, O_RDONLY); if (fd < 0) { return errno; } if (fstat(fd, &st) < 0) { ret = errno; goto isloadable_end; } if (S_ISDIR(st.st_mode)) { ret = EISDIR; goto isloadable_end; } isloadable_end: if (close(fd) < 0) { ret = errno; } return ret; } static void Migemo_dealloc(Migemo *self) { if (self->migemo_obj) { migemo_close(self->migemo_obj); } Py_TYPE(self)->tp_free((PyObject *)self); } static PyObject * Migemo_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { Migemo *self = (Migemo *)type->tp_alloc(type, 0); if (self != NULL) { self->migemo_obj = NULL; } return (PyObject *)self; } static int Migemo_init(Migemo *self, PyObject *args, PyObject *kwds) { migemo *migemo_obj; unsigned char *dictionary; static char *kwlist[] = {"dictionary", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", kwlist, &dictionary)) { return -1; } if (dictionary) { int ret = isloadable(dictionary); if (ret != 0) { PyErr_SetString(PyExc_OSError, strerror(ret)); return -1; } if (self->migemo_obj) { migemo_close(self->migemo_obj); } migemo_obj = migemo_open(dictionary); if (migemo_obj) { self->migemo_obj = migemo_obj; } else { PyErr_SetString(PyExc_AssertionError, "migemo_open() failed"); return -1; } } return 0; } static PyObject * Migemo_get_encoding(Migemo *self) { unsigned char encoding[7]; if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { PyErr_SetString(PyExc_AssertionError, "get_encoding() failed"); return NULL; } return PyUnicode_FromString(encoding); } static PyObject * Migemo_query(Migemo *self, PyObject *args, PyObject *kwds) { PyObject *result, *pyquery, *pyrestr; unsigned char *query, *regex, encoding[7]; static char *kwlist[] = {"query", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &pyquery)) { return NULL; } if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { PyErr_SetString(PyExc_AssertionError, "get_encoding() failed"); return NULL; } if ((query = trans_string(pyquery, encoding)) == NULL) { return NULL; } regex = migemo_query(self->migemo_obj, query); free(query); if (regex == NULL) { PyErr_SetString(PyExc_AssertionError, "migemo_query() failed"); return NULL; } pyrestr = PyBytes_FromString(regex); migemo_release(self->migemo_obj, regex); if (pyrestr == NULL) { return NULL; } result = PyUnicode_FromEncodedObject(pyrestr, encoding, "strict"); Py_DECREF(pyrestr); return result; } static PyObject * Migemo_set_operator(Migemo *self, PyObject *args, PyObject *kwds) { PyObject *result, *pyop; unsigned char *op, encoding[7]; int index; static char *kwlist[] = {"index", "op", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "iO", kwlist, &index, &pyop)) { return NULL; } if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { PyErr_SetString(PyExc_AssertionError, "get_encoding() failed"); return NULL; } if ((op = trans_string(pyop, encoding)) == NULL) { return NULL; } result = PyBool_FromLong((long)migemo_set_operator(self->migemo_obj, index, op)); free(op); return result; } static PyObject * Migemo_get_operator(Migemo *self, PyObject *args, PyObject *kwds) { PyObject *result; const unsigned char *op; unsigned char encoding[7]; int index; static char *kwlist[] = {"index", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "i", kwlist, &index)) { return NULL; } if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { PyErr_SetString(PyExc_AssertionError, "get_encoding() failed"); return NULL; } if (op = migemo_get_operator(self->migemo_obj, index)) { PyObject *tmp = PyBytes_FromString(op); if (tmp == NULL) { return NULL; } result = PyUnicode_FromEncodedObject(tmp, encoding, "strict"); Py_DECREF(tmp); return result; } PyErr_SetString(PyExc_ValueError, "invalid opindex"); return NULL; } static PyObject * Migemo_load(Migemo *self, PyObject *args, PyObject *kwds) { unsigned char *dict_file; int dict_id; static char *kwlist[] = {"dict_id", "dict_file", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &dict_id, &dict_file)) { return NULL; } if (dict_file) { int ret = isloadable(dict_file); if (ret != 0) { PyErr_SetString(PyExc_OSError, strerror(ret)); return NULL; } return PyLong_FromLong((long)migemo_load(self->migemo_obj, dict_id, dict_file)); } PyErr_SetString(PyExc_AssertionError, "dict_file is NULL"); return NULL; } static PyObject * Migemo_is_enable(Migemo *self) { return PyBool_FromLong((long)migemo_is_enable(self->migemo_obj)); } static PyMethodDef Migemo_methods[] = { {"query", (PyCFunction)Migemo_query, METH_VARARGS | METH_KEYWORDS, "return regex from string\n\ \n\ def query(query)\n\ query: string\n\ \n\ returns: regex string"}, {"set_operator", (PyCFunction)Migemo_set_operator, METH_VARARGS | METH_KEYWORDS, "set operator string as the meta character of regex\n\ \n\ def set_operator(index, op):\n\ index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ op: operator string\n\ \n\ returns: boolean value"}, {"get_operator", (PyCFunction)Migemo_get_operator, METH_VARARGS | METH_KEYWORDS, "get operator string as the meta character of regex\n\ \n\ def get_operator(index)\n\ index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ \n\ returns: operator string"}, {"load", (PyCFunction)Migemo_load, METH_VARARGS | METH_KEYWORDS, "add dictionary to Migemo object\n\ \n\ def load(dict_id, dict_file)\n\ dict_id: (DICTID_HAN2ZEN|DICTID_HIRA2KATA|DICTID_MIGEMO|\n\ DICTID_ROMA2HIRA|DICTID_ZEN2HAN)\n\ dict_file: path to dictionary file\n\ \n\ returns: ID of loaded dictionary"}, {"is_enable", (PyCFunction)Migemo_is_enable, METH_NOARGS, "check internal migemo_dict\n\ \n\ def is_enable()\n\ returns: boolean value"}, {"get_encoding", (PyCFunction)Migemo_get_encoding, METH_NOARGS, "get dictionary encoding\n\ \n\ def get_encoding()\n\ returns: encoding string"}, {NULL} /* Sentinel */ }; static PyMemberDef Migemo_members[] = { {NULL} /* Sentinel */ }; #ifndef PyVarObject_HEAD_INIT # define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, #endif static PyTypeObject MigemoType = { PyVarObject_HEAD_INIT(NULL, 0) "migemo.Migemo", /*tp_name*/ sizeof(Migemo), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Migemo_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash */ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ "", /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ Migemo_methods, /* tp_methods */ Migemo_members, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ (initproc)Migemo_init, /* tp_init */ 0, /* tp_alloc */ Migemo_new, /* tp_new */ }; #define PYMIGEMO_MODULEDOC "C/Migemo Python binding" static PyMethodDef module_methods[] = { {NULL} /* Sentinel */ }; #ifdef PYTHON3 static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "migemo", /* m_name */ PYMIGEMO_MODULEDOC, /* m_doc */ -1, /* m_size */ module_methods, /* m_methods */ NULL, /* m_reload */ NULL, /* m_traverse */ NULL, /* m_clear */ NULL, /* m_free */ }; #endif #ifdef PYTHON3 # define MOD_INIT(name) PyObject *PyInit_##name(void) #else # define MOD_INIT(name) void init##name(void) #endif MOD_INIT(migemo) { PyObject* m; if (PyType_Ready(&MigemoType) < 0) return; #ifdef PYTHON3 m = PyModule_Create(&moduledef); #else m = Py_InitModule3("migemo", module_methods, PYMIGEMO_MODULEDOC); #endif if (m == NULL) { #ifdef PYTHON3 return NULL; #else return; #endif } Py_INCREF(&MigemoType); PyModule_AddObject(m, "Migemo", (PyObject *)&MigemoType); PyModule_AddObject(m, "PYMIGEMO_VERSION", Py_BuildValue("s", PYMIGEMO_VERSION)); PyModule_AddObject(m, "MIGEMO_VERSION", Py_BuildValue("s", MIGEMO_VERSION)); PyModule_AddObject(m, "DICTID_INVALID", Py_BuildValue("i", MIGEMO_DICTID_INVALID)); PyModule_AddObject(m, "DICTID_MIGEMO", Py_BuildValue("i", MIGEMO_DICTID_MIGEMO)); PyModule_AddObject(m, "DICTID_ROMA2HIRA", Py_BuildValue("i", MIGEMO_DICTID_ROMA2HIRA)); PyModule_AddObject(m, "DICTID_HIRA2KATA", Py_BuildValue("i", MIGEMO_DICTID_HIRA2KATA)); PyModule_AddObject(m, "DICTID_HAN2ZEN", Py_BuildValue("i", MIGEMO_DICTID_HAN2ZEN)); PyModule_AddObject(m, "DICTID_ZEN2HAN", Py_BuildValue("i", MIGEMO_DICTID_ZEN2HAN)); PyModule_AddObject(m, "OPINDEX_OR", Py_BuildValue("i", MIGEMO_OPINDEX_OR)); PyModule_AddObject(m, "OPINDEX_NEST_IN", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_IN)); PyModule_AddObject(m, "OPINDEX_NEST_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_OUT)); PyModule_AddObject(m, "OPINDEX_SELECT_IN", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_IN)); PyModule_AddObject(m, "OPINDEX_SELECT_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_OUT)); PyModule_AddObject(m, "OPINDEX_NEWLINE", Py_BuildValue("i", MIGEMO_OPINDEX_NEWLINE)); #ifdef PYTHON3 return m; #endif }