regluit/notebooks/PR_644_Handle_control_chara...

136 lines
2.7 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"[Handle control characters by eshellman · Pull Request #644 · Gluejar/regluit](https://github.com/Gluejar/regluit/pull/644)\n",
"\n",
"* <https://github.com/Gluejar/regluit/pull/644/files>\n",
"* branch: `no-control-characters`\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from itertools import islice\n",
"\n",
"from regluit.core.models import Work\n",
"from regluit.utils import text\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def is_clean(u):\n",
" return u == text.remove_badxml(u)\n",
"\n",
"assert(is_clean(unichr(0x08)) == False)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def dirty_works():\n",
" for work in Work.objects.all():\n",
" if not is_clean(work.title):\n",
" yield work"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"work = next(dirty_works())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"u\"HBR's 10 Must Reads on Managing People (with featured article \\x93Leadership That Gets Results,\\x94 by Daniel Goleman)\""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"work.title"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"u\"HBR's 10 Must Reads on Managing People (with featured article Leadership That Gets Results, by Daniel Goleman)\""
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"text.remove_badxml(work.title)"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}