Fixed a bug in which the character encoding of uploaded CSV files was not properly detected and used to decode the file.
Note that this fix introduces a new dependency: http://chardet.feedparser.org/ found in the python-chardet package.
This commit is contained in:
parent
5ef02e5c0c
commit
c6dbeffc10
3
INSTALL
3
INSTALL
|
@ -11,13 +11,14 @@ First, install the prerequisites:
|
|||
* simplejson 1.3
|
||||
* pytz 2006p
|
||||
* Python Imaging Library 1.1
|
||||
* Python Universal Encoding Detector 1.0
|
||||
|
||||
In Debian GNU/Linux, you can issue the following command to install these
|
||||
packages:
|
||||
|
||||
apt-get install python2.4 python-cherrypy postgresql-8.1 \
|
||||
postgresql-contrib-8.1 python-psycopg2 python-simplejson \
|
||||
python-tz python-imaging
|
||||
python-tz python-imaging python-chardet
|
||||
|
||||
|
||||
database setup
|
||||
|
|
6
NEWS
6
NEWS
|
@ -1,3 +1,9 @@
|
|||
1.4.25: August 11, 2008:
|
||||
* Fixed a bug in which the character encoding of uploaded CSV files was not
|
||||
properly detected and used to decode the file. Note that this fix
|
||||
introduces a new dependency: http://chardet.feedparser.org/ found in the
|
||||
python-chardet package.
|
||||
|
||||
1.4.24: August 11, 2008:
|
||||
* Added a light gray line under note title text to make it clearer that it's
|
||||
a title as opposed to just bold text.
|
||||
|
|
|
@ -8,6 +8,7 @@ import cherrypy
|
|||
from PIL import Image
|
||||
from cStringIO import StringIO
|
||||
from threading import Lock, Event
|
||||
from chardet.universaldetector import UniversalDetector
|
||||
from Expose import expose
|
||||
from Validate import validate, Valid_int, Valid_bool, Validation_error
|
||||
from Database import Valid_id, end_transaction
|
||||
|
@ -786,7 +787,7 @@ class Files( object ):
|
|||
@return: rows of data from the parsed file. each row is a list of elements
|
||||
@raise Parse_error: there was an error in parsing the given file
|
||||
"""
|
||||
APPROX_SNIFF_SAMPLE_SIZE_BYTES = 1024 * 1024
|
||||
APPROX_SNIFF_SAMPLE_SIZE_BYTES = 1024 * 50
|
||||
|
||||
try:
|
||||
import csv
|
||||
|
@ -800,6 +801,15 @@ class Files( object ):
|
|||
|
||||
has_header = sniffer.has_header( sniff_sample )
|
||||
|
||||
# attempt to determine the file's character encoding
|
||||
detector = UniversalDetector()
|
||||
for line in lines:
|
||||
detector.feed( line )
|
||||
if detector.done: break
|
||||
|
||||
detector.close()
|
||||
encoding = detector.result.get( "encoding" )
|
||||
|
||||
table_file.seek( 0 )
|
||||
reader = csv.reader( table_file )
|
||||
|
||||
|
@ -820,7 +830,7 @@ class Files( object ):
|
|||
else:
|
||||
expected_row_length = current_row_length
|
||||
|
||||
yield row
|
||||
yield [ element.decode( encoding ) for element in row ]
|
||||
except ( csv.Error, IOError, TypeError ):
|
||||
raise Parse_error()
|
||||
|
||||
|
|
|
@ -2247,7 +2247,8 @@ Wiki.prototype.display_import_notebook = function ( result ) {
|
|||
|
||||
var div = createDOM( "div", {},
|
||||
createDOM( "p", {}, "Almost done. I just need a little information about your file before I can complete the import and create a new notebook." ),
|
||||
form
|
||||
form,
|
||||
createDOM( "p", {}, "Once you begin the import, it may take several seconds to complete." )
|
||||
);
|
||||
|
||||
this.create_editor( "import", "<h3>import a notebook</h3>" + div.innerHTML, undefined, undefined, undefined, false, true, true, undefined );
|
||||
|
|
Reference in New Issue