diff --git a/INSTALL b/INSTALL index 94a92c0..fdd17d5 100644 --- a/INSTALL +++ b/INSTALL @@ -11,13 +11,14 @@ First, install the prerequisites: * simplejson 1.3 * pytz 2006p * Python Imaging Library 1.1 + * Python Universal Encoding Detector 1.0 In Debian GNU/Linux, you can issue the following command to install these packages: apt-get install python2.4 python-cherrypy postgresql-8.1 \ postgresql-contrib-8.1 python-psycopg2 python-simplejson \ - python-tz python-imaging + python-tz python-imaging python-chardet database setup diff --git a/NEWS b/NEWS index 66500c4..6962a70 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,9 @@ +1.4.25: August 11, 2008: + * Fixed a bug in which the character encoding of uploaded CSV files was not + properly detected and used to decode the file. Note that this fix + introduces a new dependency: http://chardet.feedparser.org/ found in the + python-chardet package. + 1.4.24: August 11, 2008: * Added a light gray line under note title text to make it clearer that it's a title as opposed to just bold text. diff --git a/controller/Files.py b/controller/Files.py index 52a850f..19f104b 100644 --- a/controller/Files.py +++ b/controller/Files.py @@ -8,6 +8,7 @@ import cherrypy from PIL import Image from cStringIO import StringIO from threading import Lock, Event +from chardet.universaldetector import UniversalDetector from Expose import expose from Validate import validate, Valid_int, Valid_bool, Validation_error from Database import Valid_id, end_transaction @@ -786,7 +787,7 @@ class Files( object ): @return: rows of data from the parsed file. each row is a list of elements @raise Parse_error: there was an error in parsing the given file """ - APPROX_SNIFF_SAMPLE_SIZE_BYTES = 1024 * 1024 + APPROX_SNIFF_SAMPLE_SIZE_BYTES = 1024 * 50 try: import csv @@ -800,6 +801,15 @@ class Files( object ): has_header = sniffer.has_header( sniff_sample ) + # attempt to determine the file's character encoding + detector = UniversalDetector() + for line in lines: + detector.feed( line ) + if detector.done: break + + detector.close() + encoding = detector.result.get( "encoding" ) + table_file.seek( 0 ) reader = csv.reader( table_file ) @@ -820,7 +830,7 @@ class Files( object ): else: expected_row_length = current_row_length - yield row + yield [ element.decode( encoding ) for element in row ] except ( csv.Error, IOError, TypeError ): raise Parse_error() diff --git a/static/js/Wiki.js b/static/js/Wiki.js index 9e130a4..63e3d17 100644 --- a/static/js/Wiki.js +++ b/static/js/Wiki.js @@ -2247,7 +2247,8 @@ Wiki.prototype.display_import_notebook = function ( result ) { var div = createDOM( "div", {}, createDOM( "p", {}, "Almost done. I just need a little information about your file before I can complete the import and create a new notebook." ), - form + form, + createDOM( "p", {}, "Once you begin the import, it may take several seconds to complete." ) ); this.create_editor( "import", "

import a notebook

" + div.innerHTML, undefined, undefined, undefined, false, true, true, undefined );