Commit 3a979658 authored by Maximilian Köhl's avatar Maximilian Köhl
Browse files

initial commit

parents
build
dist
.venv
.tox
.coverage
MANIFEST
docs/_build
*.egg-info
*.pyd
.vscode/*
!.vscode/settings.windows.template.json
!.vscode/tasks.json
.hypothesis
.mypy_cache
.pytest_cache
pip-wheel-metadata
__pycache__
playground
Python Unicode Buffer
=====================
from distutils.core import Extension
def build(setup_kwargs):
"""
This function is mandatory in order to build the extensions.
"""
setup_kwargs.update(
{"ext_modules": [Extension("unibuf._unibuf", ["unibuf/_unibuf.c"])]}
)
This diff is collapsed.
[virtualenvs]
in-project = true
[tool.poetry]
name = "unibuf"
version = "0.1.0"
description = "An implementation of the buffer protocol for unicode strings."
authors = [
"Maximilian Köhl <mkoehl@cs.uni-saarland.de>"
]
license = "MIT"
readme = "README.rst"
repository = "https://dgit.cs.uni-saarland.de/koehlma/python-unicode-buffer"
classifiers = [
"License :: OSI Approved :: MIT License",
"Development Status :: 4 - Beta",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: 3.8",
"Operating System :: OS Independent"
]
build = "build.py"
[tool.poetry.dependencies]
python = "^3.8"
hypothesis = "^5.5.4"
pytest = "^5.3.5"
[tool.poetry.dev-dependencies]
black = { version = "^19.10b0", allow-prereleases = true }
flake8 = "^3.7.9"
flake8-bugbear = "^20.1.2"
pep8-naming = "^0.9.1"
mypy = "^0.761"
[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"
[flake8]
max-line-length = 99
extend-ignore = E203,F811,E231
[mypy]
disallow_untyped_calls = True
disallow_untyped_defs = True
disallow_incomplete_defs = True
check_untyped_defs = True
disallow_subclassing_any = True
warn_no_return = True
strict_optional = True
strict_equality = True
no_implicit_optional = True
disallow_any_generics = True
disallow_any_unimported = True
warn_redundant_casts = True
warn_unused_ignores = True
warn_unused_configs = True
show_traceback = True
show_error_codes = True
pretty = True
# -*- coding:utf-8 -*-
#
# Copyright (C) 2020, Maximilian Köhl <mkoehl@cs.uni-saarland.de>
from __future__ import annotations
# -*- coding:utf-8 -*-
#
# Copyright (C) 2020, Maximilian Köhl <mkoehl@cs.uni-saarland.de>
from __future__ import annotations
from hypothesis import given, settings, strategies as st
from unibuf import UnicodeBuffer
@given(st.text())
@settings(max_examples=2000)
def test_unicode_buffer(text: str) -> None:
buffer = UnicodeBuffer(text)
assert bytes(buffer) == text.encode(buffer.encoding)
assert bytes(buffer).decode(buffer.encoding) == text
# -*- coding:utf-8 -*-
#
# Copyright (C) 2020, Maximilian Köhl <mkoehl@cs.uni-saarland.de>
from __future__ import annotations
from ._unibuf import UnicodeBuffer
#include <Python.h>
#include "structmember.h"
typedef struct {
PyObject_HEAD
PyObject *string;
const char *string_encoding;
int string_kind;
Py_ssize_t string_length;
size_t string_itemsize;
} PyUnicodeBuffer;
#define UCS1_ENCODING_NAME "ISO-8859-1"
#if WORDS_BIGENDIAN
#define UCS2_ENCODING_NAME "UTF-16BE"
#define UCS4_ENCODING_NAME "UTF-32BE"
#else
#define UCS2_ENCODING_NAME "UTF-16LE"
#define UCS4_ENCODING_NAME "UTF-32LE"
#endif
static const char *encoding_name_from_itemsize(size_t itemsize) {
switch (itemsize) {
case 1:
return UCS1_ENCODING_NAME;
case 2:
return UCS2_ENCODING_NAME;
case 4:
return UCS4_ENCODING_NAME;
default:
return "UNKNOWN";
}
}
static PyObject *PyUnicodeBuffer_new(
PyTypeObject *type,
PyObject *args,
PyObject *kwargs
) {
static char *keywords[] = {"string", NULL};
PyUnicodeBuffer *self;
self = (PyUnicodeBuffer *) type->tp_alloc(type, 0);
if (self != NULL) {
PyObject *string = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", keywords, &string)) {
Py_DECREF(self);
return NULL;
}
if (!PyUnicode_Check(string)) {
PyErr_Format(
PyExc_TypeError,
"UnicodeBuffer() argument must be a string, not '%.200s'",
Py_TYPE(string)->tp_name
);
Py_DECREF(self);
return NULL;
}
if (PyUnicode_READY(string)) {
Py_DECREF(self);
return NULL;
}
self->string = string;
self->string_kind = PyUnicode_KIND(string);
self->string_length = PyUnicode_GET_LENGTH(string);
switch (PyUnicode_KIND(self->string)) {
case PyUnicode_WCHAR_KIND:
self->string_itemsize = sizeof(wchar_t);
break;
case PyUnicode_1BYTE_KIND:
self->string_itemsize = 1;
break;
case PyUnicode_2BYTE_KIND:
self->string_itemsize = 2;
break;
case PyUnicode_4BYTE_KIND:
self->string_itemsize = 4;
break;
default:
PyErr_Format(
PyExc_ValueError,
"invalid PyUnicode_KIND %i",
PyUnicode_KIND(self->string)
);
Py_DECREF(self);
return NULL;
}
self->string_encoding = encoding_name_from_itemsize(self->string_itemsize);
Py_INCREF(string);
}
return (PyObject *) self;
};
static void PyUnicodeBuffer_dealloc(PyUnicodeBuffer *self) {
Py_XDECREF(self->string);
Py_TYPE(self)->tp_free((PyObject *) self);
};
static PyObject *PyUnicodeBuffer_str(PyUnicodeBuffer *self) {
return PyUnicode_FromFormat(
"<UnicodeBuffer %R encoding='%s'>",
self->string,
self->string_encoding
);
}
static int PyUnicodeBuffer_getbuffer(PyUnicodeBuffer *self, Py_buffer *view, int flags) {
if (view == NULL) {
PyErr_SetString(PyExc_ValueError, "NULL view in getbuffer");
return -1;
}
view->obj = (PyObject*) self;
view->buf = PyUnicode_DATA(self->string);
view->len = self->string_length * self->string_itemsize;
view->readonly = 1;
view->itemsize = self->string_itemsize;
view->format = "i";
view->ndim = 1;
view->shape = &self->string_length;
view->strides = &self->string_itemsize;
view->suboffsets = NULL;
view->internal = NULL;
Py_INCREF(self);
return 0;
}
static PyBufferProcs PyUnicodeBuffer_as_buffer = {
(getbufferproc) PyUnicodeBuffer_getbuffer,
(releasebufferproc) NULL,
};
static PyMemberDef PyUnicodeBuffer_members[] = {
{"encoding", T_STRING, offsetof(PyUnicodeBuffer, string_encoding), 0, "the underlying encoding"},
{NULL}
};
static PyTypeObject py_unicode_buffer_type = {
PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "unibuf.UnicodeBuffer",
.tp_doc = "Buffer protocol for unicode objects.",
.tp_basicsize = sizeof(PyUnicodeBuffer),
.tp_itemsize = 0,
.tp_flags = Py_TPFLAGS_DEFAULT,
.tp_new = PyUnicodeBuffer_new,
.tp_dealloc = (destructor) PyUnicodeBuffer_dealloc,
.tp_repr = (reprfunc) PyUnicodeBuffer_str,
.tp_str = (reprfunc) PyUnicodeBuffer_str,
.tp_as_buffer = &PyUnicodeBuffer_as_buffer,
.tp_members = PyUnicodeBuffer_members
};
static PyModuleDef py_unibuf_module = {
PyModuleDef_HEAD_INIT,
.m_name = "_unibuf",
.m_doc = "A buffer protocol for unicode objects.",
.m_size = -1,
};
PyMODINIT_FUNC PyInit__unibuf(void) {
PyObject *module;
if (PyType_Ready(&py_unicode_buffer_type) < 0) {
return NULL;
}
module = PyModule_Create(&py_unibuf_module);
if (module == NULL) {
return NULL;
}
Py_INCREF(&py_unicode_buffer_type);
if (PyModule_AddObject(module, "UnicodeBuffer", (PyObject *) &py_unicode_buffer_type) < 0) {
Py_DECREF(&py_unicode_buffer_type);
Py_DECREF(module);
return NULL;
}
return module;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment