witten
/
luminotes
Archived
1
0
Fork 0

Fixed a bug in which the character encoding of uploaded CSV files was not properly detected and used to decode the file.

Note that this fix introduces a new dependency: http://chardet.feedparser.org/ found in the python-chardet package.
This commit is contained in:
Dan Helfman 2008-08-11 22:53:01 -07:00
parent 5ef02e5c0c
commit c6dbeffc10
4 changed files with 22 additions and 4 deletions

View File

@ -11,13 +11,14 @@ First, install the prerequisites:
* simplejson 1.3
* pytz 2006p
* Python Imaging Library 1.1
* Python Universal Encoding Detector 1.0
In Debian GNU/Linux, you can issue the following command to install these
packages:
apt-get install python2.4 python-cherrypy postgresql-8.1 \
postgresql-contrib-8.1 python-psycopg2 python-simplejson \
python-tz python-imaging
python-tz python-imaging python-chardet
database setup

6
NEWS
View File

@ -1,3 +1,9 @@
1.4.25: August 11, 2008:
* Fixed a bug in which the character encoding of uploaded CSV files was not
properly detected and used to decode the file. Note that this fix
introduces a new dependency: http://chardet.feedparser.org/ found in the
python-chardet package.
1.4.24: August 11, 2008:
* Added a light gray line under note title text to make it clearer that it's
a title as opposed to just bold text.

View File

@ -8,6 +8,7 @@ import cherrypy
from PIL import Image
from cStringIO import StringIO
from threading import Lock, Event
from chardet.universaldetector import UniversalDetector
from Expose import expose
from Validate import validate, Valid_int, Valid_bool, Validation_error
from Database import Valid_id, end_transaction
@ -786,7 +787,7 @@ class Files( object ):
@return: rows of data from the parsed file. each row is a list of elements
@raise Parse_error: there was an error in parsing the given file
"""
APPROX_SNIFF_SAMPLE_SIZE_BYTES = 1024 * 1024
APPROX_SNIFF_SAMPLE_SIZE_BYTES = 1024 * 50
try:
import csv
@ -800,6 +801,15 @@ class Files( object ):
has_header = sniffer.has_header( sniff_sample )
# attempt to determine the file's character encoding
detector = UniversalDetector()
for line in lines:
detector.feed( line )
if detector.done: break
detector.close()
encoding = detector.result.get( "encoding" )
table_file.seek( 0 )
reader = csv.reader( table_file )
@ -820,7 +830,7 @@ class Files( object ):
else:
expected_row_length = current_row_length
yield row
yield [ element.decode( encoding ) for element in row ]
except ( csv.Error, IOError, TypeError ):
raise Parse_error()

View File

@ -2247,7 +2247,8 @@ Wiki.prototype.display_import_notebook = function ( result ) {
var div = createDOM( "div", {},
createDOM( "p", {}, "Almost done. I just need a little information about your file before I can complete the import and create a new notebook." ),
form
form,
createDOM( "p", {}, "Once you begin the import, it may take several seconds to complete." )
);
this.create_editor( "import", "<h3>import a notebook</h3>" + div.innerHTML, undefined, undefined, undefined, false, true, true, undefined );