witten
/
luminotes
Archived
1
0
Fork 0

Fixed a bug in which the character encoding of uploaded CSV files was not properly detected and used to decode the file.

Note that this fix introduces a new dependency: http://chardet.feedparser.org/ found in the python-chardet package.
This commit is contained in:
Dan Helfman 2008-08-11 22:53:01 -07:00
parent 5ef02e5c0c
commit c6dbeffc10
4 changed files with 22 additions and 4 deletions

View File

@ -11,13 +11,14 @@ First, install the prerequisites:
* simplejson 1.3 * simplejson 1.3
* pytz 2006p * pytz 2006p
* Python Imaging Library 1.1 * Python Imaging Library 1.1
* Python Universal Encoding Detector 1.0
In Debian GNU/Linux, you can issue the following command to install these In Debian GNU/Linux, you can issue the following command to install these
packages: packages:
apt-get install python2.4 python-cherrypy postgresql-8.1 \ apt-get install python2.4 python-cherrypy postgresql-8.1 \
postgresql-contrib-8.1 python-psycopg2 python-simplejson \ postgresql-contrib-8.1 python-psycopg2 python-simplejson \
python-tz python-imaging python-tz python-imaging python-chardet
database setup database setup

6
NEWS
View File

@ -1,3 +1,9 @@
1.4.25: August 11, 2008:
* Fixed a bug in which the character encoding of uploaded CSV files was not
properly detected and used to decode the file. Note that this fix
introduces a new dependency: http://chardet.feedparser.org/ found in the
python-chardet package.
1.4.24: August 11, 2008: 1.4.24: August 11, 2008:
* Added a light gray line under note title text to make it clearer that it's * Added a light gray line under note title text to make it clearer that it's
a title as opposed to just bold text. a title as opposed to just bold text.

View File

@ -8,6 +8,7 @@ import cherrypy
from PIL import Image from PIL import Image
from cStringIO import StringIO from cStringIO import StringIO
from threading import Lock, Event from threading import Lock, Event
from chardet.universaldetector import UniversalDetector
from Expose import expose from Expose import expose
from Validate import validate, Valid_int, Valid_bool, Validation_error from Validate import validate, Valid_int, Valid_bool, Validation_error
from Database import Valid_id, end_transaction from Database import Valid_id, end_transaction
@ -786,7 +787,7 @@ class Files( object ):
@return: rows of data from the parsed file. each row is a list of elements @return: rows of data from the parsed file. each row is a list of elements
@raise Parse_error: there was an error in parsing the given file @raise Parse_error: there was an error in parsing the given file
""" """
APPROX_SNIFF_SAMPLE_SIZE_BYTES = 1024 * 1024 APPROX_SNIFF_SAMPLE_SIZE_BYTES = 1024 * 50
try: try:
import csv import csv
@ -800,6 +801,15 @@ class Files( object ):
has_header = sniffer.has_header( sniff_sample ) has_header = sniffer.has_header( sniff_sample )
# attempt to determine the file's character encoding
detector = UniversalDetector()
for line in lines:
detector.feed( line )
if detector.done: break
detector.close()
encoding = detector.result.get( "encoding" )
table_file.seek( 0 ) table_file.seek( 0 )
reader = csv.reader( table_file ) reader = csv.reader( table_file )
@ -820,7 +830,7 @@ class Files( object ):
else: else:
expected_row_length = current_row_length expected_row_length = current_row_length
yield row yield [ element.decode( encoding ) for element in row ]
except ( csv.Error, IOError, TypeError ): except ( csv.Error, IOError, TypeError ):
raise Parse_error() raise Parse_error()

View File

@ -2247,7 +2247,8 @@ Wiki.prototype.display_import_notebook = function ( result ) {
var div = createDOM( "div", {}, var div = createDOM( "div", {},
createDOM( "p", {}, "Almost done. I just need a little information about your file before I can complete the import and create a new notebook." ), createDOM( "p", {}, "Almost done. I just need a little information about your file before I can complete the import and create a new notebook." ),
form form,
createDOM( "p", {}, "Once you begin the import, it may take several seconds to complete." )
); );
this.create_editor( "import", "<h3>import a notebook</h3>" + div.innerHTML, undefined, undefined, undefined, false, true, true, undefined ); this.create_editor( "import", "<h3>import a notebook</h3>" + div.innerHTML, undefined, undefined, undefined, false, true, true, undefined );