Personal wiki notebook (not under development)

Files.py 32KB


  1. import os
  2. import re
  3. import sys
  4. import cgi
  5. import time
  6. import urllib
  7. import os.path
  8. import httplib
  9. import tempfile
  10. import cherrypy
  11. from PIL import Image
  12. from cStringIO import StringIO
  13. from threading import Lock
  14. from chardet.universaldetector import UniversalDetector
  15. from Expose import expose
  16. from Validate import validate, Valid_int, Valid_bool, Validation_error
  17. from Database import Valid_id, end_transaction
  18. from Users import grab_user_id, Access_error
  19. from Expire import strongly_expire, weakly_expire
  20. from model.File import File
  21. from model.User import User
  22. from model.Notebook import Notebook
  23. from model.Download_access import Download_access
  24. from view.Blank_page import Blank_page
  25. from view.Json import Json
  26. from view.Progress_bar import quota_error_script, general_error_script
  27. from view.File_preview_page import File_preview_page
  28. class Upload_error( Exception ):
  29. def __init__( self, message = None ):
  30. if message is None:
  31. message = u"An error occurred when uploading the file."
  32. Exception.__init__( self, message )
  33. self.__message = message
  34. def to_dict( self ):
  35. return dict(
  36. error = self.__message
  37. )
  38. class Parse_error( Exception ):
  39. def __init__( self, message = None ):
  40. if message is None:
  41. message = u"Sorry, I can't figure out how to read that file. Please try a different file, or contact support for help."
  42. Exception.__init__( self, message )
  43. self.__message = message
  44. def to_dict( self ):
  45. return dict(
  46. error = self.__message
  47. )
  48. # map of upload id to Upload_file
  49. current_uploads = {}
  50. current_uploads_lock = Lock()
  51. def make_files_dir():
  52. if sys.platform.startswith( "win" ):
  53. files_dir = os.path.join( os.environ.get( "APPDATA" ), "Luminotes", "files" )
  54. else:
  55. files_dir = os.path.join( os.environ.get( "HOME", "" ), ".luminotes", "files" )
  56. if not os.path.exists( files_dir ):
  57. import stat
  58. os.makedirs( files_dir, stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR )
  59. return files_dir
  60. files_dir = make_files_dir()
  61. class Upload_file( object ):
  62. """
  63. File-like object for storing file uploads.
  64. """
  65. def __init__( self, file_id, filename, content_length ):
  66. self.__file = self.open_file( file_id, "w+" )
  67. self.__file_id = file_id
  68. self.__filename = filename
  69. self.__content_length = content_length
  70. self.__file_received_bytes = 0
  71. self.__total_received_bytes = cherrypy.request.rfile.bytes_read
  72. def write( self, data ):
  73. self.__file.write( data )
  74. self.__file_received_bytes += len( data )
  75. self.__total_received_bytes = cherrypy.request.rfile.bytes_read
  76. def tell( self ):
  77. return self.__file.tell()
  78. def seek( self, position ):
  79. self.__file.seek( position )
  80. def read( self, size = None ):
  81. if size is None:
  82. return self.__file.read()
  83. return self.__file.read( size )
  84. def close( self ):
  85. self.__file.close()
  86. def delete( self ):
  87. self.__file.close()
  88. self.delete_file( self.__file_id )
  89. @staticmethod
  90. def make_server_filename( file_id ):
  91. global files_dir
  92. return os.path.join( files_dir, u"%s" % file_id )
  93. @staticmethod
  94. def open_file( file_id, mode = None ):
  95. # force binary mode
  96. if not mode:
  97. mode = "rb"
  98. elif "b" not in mode:
  99. mode = "%sb" % mode
  100. return file( Upload_file.make_server_filename( file_id ), mode )
  101. @staticmethod
  102. def open_image( file_id ):
  103. return Image.open( Upload_file.make_server_filename( file_id ) )
  104. @staticmethod
  105. def delete_file( file_id ):
  106. try:
  107. return os.remove( Upload_file.make_server_filename( file_id ) )
  108. except OSError:
  109. pass
  110. filename = property( lambda self: self.__filename )
  111. # expected byte count of the entire form upload, including the file and other form parameters
  112. content_length = property( lambda self: self.__content_length )
  113. # count of bytes received thus far for this file upload only
  114. file_received_bytes = property( lambda self: self.__file_received_bytes )
  115. # count of bytes received thus far for the form upload, including the file and other form
  116. # parameters
  117. total_received_bytes = property( lambda self: self.__total_received_bytes )
  118. class FieldStorage( cherrypy._cpcgifs.FieldStorage ):
  119. """
  120. Derived from cherrypy._cpcgifs.FieldStorage, which is in turn derived from cgi.FieldStorage, which
  121. calls make_file() to create a temporary file where file uploads are stored. By wrapping this file
  122. object, we can track its progress as it's written. Inspired by:
  123. http://www.cherrypy.org/attachment/ticket/546/uploadfilter.py
  124. This method relies on a file_id parameter being present in the HTTP query string.
  125. @type binary: NoneType
  126. @param binary: ignored
  127. @rtype: Upload_file
  128. @return: wrapped temporary file used to store the upload
  129. @raise Upload_error: the provided file_id value is invalid, or the filename or Content-Length is
  130. missing
  131. """
  132. def make_file( self, binary = None ):
  133. global current_uploads, current_uploads_lock
  134. cherrypy.response.timeout = 3600 * 2 # increase upload timeout to 2 hours (default is 5 min)
  135. cherrypy.server.socket_timeout = 60 # increase socket timeout to one minute (default is 10 sec)
  136. DASHES_AND_NEWLINES = 6 # four dashes and two newlines
  137. # pluck the file id out of the query string. it would be preferable to grab it out of parsed
  138. # form variables instead, but at this point in the processing, all the form variables might not
  139. # be parsed
  140. file_id = cgi.parse_qs( cherrypy.request.query_string ).get( u"X-Progress-ID", [ None ] )[ 0 ]
  141. try:
  142. file_id = Valid_id()( file_id )
  143. except ValueError:
  144. raise Upload_error( "The file_id is invalid." )
  145. self.filename = unicode( self.filename.split( "/" )[ -1 ].split( "\\" )[ -1 ].strip(), "utf8" )
  146. if not self.filename:
  147. raise Upload_error( "Please provide a filename." )
  148. content_length = cherrypy.request.headers.get( "content-length", 0 )
  149. try:
  150. content_length = Valid_int( min = 0 )( content_length ) - len( self.outerboundary ) - DASHES_AND_NEWLINES
  151. except ValueError:
  152. raise Upload_error( "The Content-Length header value is invalid." )
  153. # file size is the entire content length of the POST, minus the size of the other form
  154. # parameters and boundaries. note: this assumes that the uploaded file is sent as the last
  155. # form parameter in the POST
  156. existing_file = current_uploads.get( file_id )
  157. if existing_file:
  158. existing_file.close()
  159. upload_file = Upload_file( file_id, self.filename, content_length )
  160. current_uploads_lock.acquire()
  161. try:
  162. current_uploads[ file_id ] = upload_file
  163. finally:
  164. current_uploads_lock.release()
  165. return upload_file
  166. def __write( self, line ):
  167. """
  168. This implementation of __write() is different than that of the base class, because it calls
  169. make_file() whenever there is a filename instead of only for large enough files.
  170. """
  171. if self.__file is not None and self.filename:
  172. self.file = self.make_file( "" )
  173. self.file.write( self.__file.getvalue() )
  174. self.__file = None
  175. self.file.write( line )
  176. cherrypy._cpcgifs.FieldStorage = FieldStorage
  177. class Files( object ):
  178. FILE_LINK_PATTERN = re.compile( u'<a\s+href="[^"]*/files/download\?file_id=([^"&]+)(&[^"]*)?"[^>]*>(<img )?[^<]+</a>', re.IGNORECASE )
  179. """
  180. Controller for dealing with uploaded files, corresponding to the "/files" URL.
  181. """
  182. def __init__( self, database, users, download_products, web_server ):
  183. """
  184. Create a new Files object.
  185. @type database: controller.Database
  186. @param database: database that file metadata is stored in
  187. @type users: controller.Users
  188. @param users: controller for all users
  189. @type download_products: [ { "name": unicode, ... } ]
  190. @param download_products: list of configured downloadable products
  191. @type web_server: unicode
  192. @param web_server: front-end web server (determines specific support for various features)
  193. @rtype: Files
  194. @return: newly constructed Files
  195. """
  196. self.__database = database
  197. self.__users = users
  198. self.__download_products = download_products
  199. self.__web_server = web_server
  200. @expose()
  201. @weakly_expire
  202. @end_transaction
  203. @grab_user_id
  204. @validate(
  205. file_id = Valid_id(),
  206. quote_filename = Valid_bool( none_okay = True ),
  207. preview = Valid_bool( none_okay = True ),
  208. user_id = Valid_id( none_okay = True ),
  209. )
  210. def download( self, file_id, quote_filename = False, preview = True, user_id = None ):
  211. """
  212. Return the contents of file that a user has previously uploaded.
  213. @type file_id: unicode
  214. @param file_id: id of the file to download
  215. @type quote_filename: bool
  216. @param quote_filename: True to URL quote the filename of the downloaded file, False to leave it
  217. as UTF-8. IE expects quoting while Firefox doesn't (optional, defaults
  218. to False)
  219. @type preview: bool
  220. @param preview: True to redirect to a preview page if the file is a valid image, False to
  221. unconditionally initiate a download
  222. @type user_id: unicode or NoneType
  223. @param user_id: id of current logged-in user (if any)
  224. @rtype: generator
  225. @return: file data
  226. @raise Access_error: the current user doesn't have access to the notebook that the file is in
  227. """
  228. db_file = self.__database.load( File, file_id )
  229. if not db_file or not self.__users.load_notebook( user_id, db_file.notebook_id ):
  230. raise Access_error()
  231. # if the file is openable as an image, then allow the user to view it instead of downloading it
  232. if preview:
  233. try:
  234. Upload_file.open_image( file_id )
  235. return dict( redirect = u"/files/preview?file_id=%s&quote_filename=%s" % ( file_id, quote_filename ) )
  236. except IOError:
  237. pass
  238. cherrypy.response.headerMap[ u"Content-Type" ] = db_file.content_type
  239. filename = db_file.filename.replace( '"', r"\"" ).encode( "utf8" )
  240. if quote_filename:
  241. filename = urllib.quote( filename, safe = "" )
  242. cherrypy.response.headerMap[ u"Content-Disposition" ] = 'attachment; filename="%s"' % filename
  243. cherrypy.response.headerMap[ u"Content-Length" ] = db_file.size_bytes
  244. if self.__web_server == u"nginx":
  245. cherrypy.response.headerMap[ u"X-Accel-Redirect" ] = "/download/%s" % file_id
  246. return ""
  247. def stream():
  248. CHUNK_SIZE = 8192
  249. local_file = Upload_file.open_file( file_id )
  250. local_file.seek(0)
  251. while True:
  252. data = local_file.read( CHUNK_SIZE )
  253. if len( data ) == 0: break
  254. yield data
  255. return stream()
  256. @expose()
  257. @weakly_expire
  258. @end_transaction
  259. @validate(
  260. access_id = Valid_id(),
  261. )
  262. def download_product( self, access_id ):
  263. """
  264. Return the contents of downloadable product file.
  265. @type access_id: unicode
  266. @param access_id: id of download access object that grants access to the file
  267. @rtype: generator
  268. @return: file data
  269. @raise Access_error: the access_id is unknown or doesn't grant access to the file
  270. """
  271. # load the download_access object corresponding to the given id
  272. download_access = self.__database.load( Download_access, access_id )
  273. if download_access is None:
  274. raise Access_error()
  275. # find the product corresponding to the item_number
  276. products = [
  277. product for product in self.__download_products
  278. if unicode( download_access.item_number ) == product.get( u"item_number" )
  279. ]
  280. if len( products ) == 0:
  281. raise Access_error()
  282. product = products[ 0 ]
  283. public_filename = product[ u"filename" ].encode( "utf8" )
  284. local_filename = u"products/%s" % product[ u"filename" ]
  285. if not os.path.exists( local_filename ):
  286. raise Access_error()
  287. cherrypy.response.headerMap[ u"Content-Type" ] = u"application/octet-stream"
  288. cherrypy.response.headerMap[ u"Content-Disposition" ] = 'attachment; filename="%s"' % public_filename
  289. cherrypy.response.headerMap[ u"Content-Length" ] = os.path.getsize( local_filename )
  290. if self.__web_server == u"nginx":
  291. cherrypy.response.headerMap[ u"X-Accel-Redirect" ] = "/download_product/%s" % product[ u"filename" ]
  292. return ""
  293. def stream():
  294. CHUNK_SIZE = 8192
  295. local_file = file( local_filename, "rb" )
  296. local_file.seek(0)
  297. while True:
  298. data = local_file.read( CHUNK_SIZE )
  299. if len( data ) == 0: break
  300. yield data
  301. return stream()
  302. @expose( view = File_preview_page )
  303. @end_transaction
  304. @grab_user_id
  305. @validate(
  306. file_id = Valid_id(),
  307. quote_filename = Valid_bool( none_okay = True ),
  308. user_id = Valid_id( none_okay = True ),
  309. )
  310. def preview( self, file_id, quote_filename = False, user_id = None ):
  311. """
  312. Return a page displaying an uploaded image file along with a link to download it.
  313. @type file_id: unicode
  314. @param file_id: id of the file to view
  315. @type quote_filename: bool
  316. @param quote_filename: quote_filename value to include in download URL
  317. @type user_id: unicode or NoneType
  318. @param user_id: id of current logged-in user (if any)
  319. @rtype: unicode
  320. @return: file data
  321. @raise Access_error: the current user doesn't have access to the notebook that the file is in
  322. """
  323. db_file = self.__database.load( File, file_id )
  324. if not db_file or not self.__users.load_notebook( user_id, db_file.notebook_id ):
  325. raise Access_error()
  326. filename = db_file.filename.replace( '"', r"\"" )
  327. return dict(
  328. file_id = file_id,
  329. filename = filename,
  330. quote_filename = quote_filename,
  331. )
  332. @expose()
  333. @weakly_expire
  334. @end_transaction
  335. @grab_user_id
  336. @validate(
  337. file_id = Valid_id(),
  338. max_size = Valid_int( min = 10, max = 1000, none_okay = True ),
  339. user_id = Valid_id( none_okay = True )
  340. )
  341. def thumbnail( self, file_id, max_size = None, user_id = None ):
  342. """
  343. Return a thumbnail for a file that a user has previously uploaded. If a thumbnail cannot be
  344. generated for the given file, return a default thumbnail image.
  345. @type file_id: unicode
  346. @param file_id: id of the file to return a thumbnail for
  347. @type max_size: int or NoneType
  348. @param max_size: maximum thumbnail width or height in pixels (optional, defaults to a small size)
  349. @type user_id: unicode or NoneType
  350. @param user_id: id of current logged-in user (if any)
  351. @rtype: generator
  352. @return: thumbnail image data
  353. @raise Access_error: the current user doesn't have access to the notebook that the file is in
  354. """
  355. db_file = self.__database.load( File, file_id )
  356. if not db_file or not self.__users.load_notebook( user_id, db_file.notebook_id ):
  357. raise Access_error()
  358. cherrypy.response.headerMap[ u"Content-Type" ] = u"image/png"
  359. DEFAULT_MAX_THUMBNAIL_SIZE = 125
  360. if not max_size:
  361. max_size = DEFAULT_MAX_THUMBNAIL_SIZE
  362. # attempt to open the file as an image
  363. image_buffer = None
  364. try:
  365. image = Upload_file.open_image( file_id )
  366. # scale the image down into a thumbnail
  367. image.thumbnail( ( max_size, max_size ), Image.ANTIALIAS )
  368. # save the image into a memory buffer
  369. image_buffer = StringIO()
  370. image.save( image_buffer, "PNG" )
  371. image_buffer.seek( 0 )
  372. except IOError:
  373. image = Image.open( "static/images/default_thumbnail.png" )
  374. image_buffer = StringIO()
  375. image.save( image_buffer, "PNG" )
  376. image_buffer.seek( 0 )
  377. return image_buffer.getvalue()
  378. @expose()
  379. @weakly_expire
  380. @end_transaction
  381. @grab_user_id
  382. @validate(
  383. file_id = Valid_id(),
  384. user_id = Valid_id( none_okay = True ),
  385. )
  386. def image( self, file_id, user_id = None ):
  387. """
  388. Return the contents of an image file that a user has previously uploaded. This is distinct
  389. from the download() method above in that it doesn't set HTTP headers for a file download.
  390. @type file_id: unicode
  391. @param file_id: id of the file to return
  392. @type user_id: unicode or NoneType
  393. @param user_id: id of current logged-in user (if any)
  394. @rtype: generator
  395. @return: image data
  396. @raise Access_error: the current user doesn't have access to the notebook that the file is in
  397. """
  398. db_file = self.__database.load( File, file_id )
  399. if not db_file or not self.__users.load_notebook( user_id, db_file.notebook_id ):
  400. raise Access_error()
  401. cherrypy.response.headerMap[ u"Content-Type" ] = db_file.content_type
  402. if self.__web_server == u"nginx":
  403. cherrypy.response.headerMap[ u"X-Accel-Redirect" ] = "/download/%s" % file_id
  404. return ""
  405. def stream():
  406. CHUNK_SIZE = 8192
  407. local_file = Upload_file.open_file( file_id )
  408. local_file.seek(0)
  409. while True:
  410. data = local_file.read( CHUNK_SIZE )
  411. if len( data ) == 0: break
  412. yield data
  413. return stream()
  414. @expose( view = Json )
  415. @strongly_expire
  416. @end_transaction
  417. @grab_user_id
  418. @validate(
  419. notebook_id = Valid_id(),
  420. note_id = Valid_id( none_okay = True ),
  421. user_id = Valid_id( none_okay = True ),
  422. )
  423. def upload_id( self, notebook_id, note_id, user_id ):
  424. """
  425. Generate and return a unique file id for use in an upload.
  426. @type notebook_id: unicode
  427. @param notebook_id: id of the notebook that the upload will be to
  428. @type note_id: unicode
  429. @param note_id: id of the note that the upload will be to
  430. @type user_id: unicode or NoneType
  431. @param user_id: id of current logged-in user (if any)
  432. @rtype: unicode
  433. @return: { 'file_id': file_id }
  434. @raise Access_error: the current user doesn't have access to the given notebook
  435. """
  436. notebook = self.__users.load_notebook( user_id, notebook_id, read_write = True, note_id = note_id )
  437. if not notebook or notebook.read_write == Notebook.READ_WRITE_FOR_OWN_NOTES:
  438. raise Access_error()
  439. file_id = self.__database.next_id( File )
  440. return dict(
  441. file_id = file_id,
  442. )
  443. @expose( view = Blank_page )
  444. @strongly_expire
  445. @end_transaction
  446. @grab_user_id
  447. @validate(
  448. upload = (),
  449. notebook_id = Valid_id(),
  450. note_id = Valid_id( none_okay = True ),
  451. x_progress_id = Valid_id(),
  452. user_id = Valid_id( none_okay = True ),
  453. )
  454. def upload( self, upload, notebook_id, note_id, x_progress_id, user_id ):
  455. """
  456. Upload a file from the client for attachment to a particular note. The x_progress_id must be
  457. provided as part of the query string, even if the other values are submitted as form data.
  458. @type upload: cgi.FieldStorage
  459. @param upload: file handle to uploaded file
  460. @type notebook_id: unicode
  461. @param notebook_id: id of the notebook that the upload is to
  462. @type note_id: unicode or NoneType
  463. @param note_id: id of the note that the upload is to (if any)
  464. @type x_progess_id: unicode
  465. @param x_progess_id: id of the file being uploaded
  466. @type user_id: unicode or NoneType
  467. @param user_id: id of current logged-in user (if any)
  468. @rtype: unicode
  469. @return: rendered HTML page
  470. @raise Access_error: the current user doesn't have access to the given notebook or note
  471. @raise Upload_error: the Content-Length header value is invalid
  472. """
  473. global current_uploads, current_uploads_lock
  474. file_id = x_progress_id
  475. current_uploads_lock.acquire()
  476. try:
  477. uploaded_file = current_uploads.get( file_id )
  478. if not uploaded_file:
  479. return dict( script = general_error_script % u"Please select a file to upload." )
  480. del( current_uploads[ file_id ] )
  481. finally:
  482. current_uploads_lock.release()
  483. user = self.__database.load( User, user_id )
  484. notebook = self.__users.load_notebook( user_id, notebook_id, read_write = True )
  485. if not user or not notebook or notebook.read_write == Notebook.READ_WRITE_FOR_OWN_NOTES:
  486. uploaded_file.delete()
  487. return dict( script = general_error_script % u"Sorry, you don't have access to do that. Please make sure you're logged in as the correct user." )
  488. content_type = upload.headers.get( "content-type" )
  489. # if we didn't receive all of the expected data, abort
  490. if uploaded_file.total_received_bytes < uploaded_file.content_length:
  491. uploaded_file.delete()
  492. return dict( script = general_error_script % u"The uploaded file was not fully received. Please try again or contact support." )
  493. if uploaded_file.file_received_bytes == 0:
  494. uploaded_file.delete()
  495. return dict( script = general_error_script % u"The uploaded file was not received. Please make sure that the file exists." )
  496. # if the uploaded file's size would put the user over quota, bail and inform the user
  497. rate_plan = self.__users.rate_plan( user.rate_plan )
  498. storage_quota_bytes = rate_plan.get( u"storage_quota_bytes" )
  499. if storage_quota_bytes and user.storage_bytes + uploaded_file.total_received_bytes > storage_quota_bytes:
  500. uploaded_file.delete()
  501. return dict( script = quota_error_script )
  502. # record metadata on the upload in the database
  503. db_file = File.create( file_id, notebook_id, note_id, uploaded_file.filename, uploaded_file.file_received_bytes, content_type )
  504. self.__database.save( db_file, commit = False )
  505. self.__users.update_storage( user_id, commit = False )
  506. self.__database.commit()
  507. uploaded_file.close()
  508. return dict()
  509. @expose( view = Json )
  510. @strongly_expire
  511. @end_transaction
  512. @grab_user_id
  513. @validate(
  514. x_progress_id = Valid_id(),
  515. user_id = Valid_id( none_okay = True ),
  516. )
  517. def progress( self, x_progress_id, user_id = None ):
  518. """
  519. Return information on a file that is in the process of being uploaded. This method does not
  520. perform any access checks, but the only information revealed is the file's upload progress.
  521. This method is intended to be polled while the file is uploading, and its returned data is
  522. intended to mimic the API described here:
  523. http://wiki.nginx.org//NginxHttpUploadProgressModule
  524. @type x_progress_id: unicode
  525. @param x_progress_id: id of a currently uploading file
  526. @type user_id: unicode or NoneType
  527. @param user_id: id of current logged-in user (if any)
  528. @rtype: dict
  529. @return: one of the following:
  530. { 'state': 'starting' } // file_id is unknown
  531. { 'state': 'done' } // upload is complete
  532. { 'state': 'error', 'status': http_error_code } // upload generated an HTTP error
  533. { 'state': 'uploading', // upload is in progress
  534. 'received': bytes_received, 'size': total_bytes }
  535. """
  536. global current_uploads
  537. file_id = x_progress_id
  538. uploading_file = current_uploads.get( file_id )
  539. db_file = None
  540. user = self.__database.load( User, user_id )
  541. if not user:
  542. return dict(
  543. state = "error",
  544. status = httplib.FORBIDDEN,
  545. )
  546. if uploading_file:
  547. # if the uploaded file's size would put the user over quota, bail and inform the user
  548. SOFT_QUOTA_FACTOR = 1.05 # fudge factor since content_length isn't really the file's actual size
  549. rate_plan = self.__users.rate_plan( user.rate_plan )
  550. storage_quota_bytes = rate_plan.get( u"storage_quota_bytes" )
  551. if storage_quota_bytes and \
  552. user.storage_bytes + uploading_file.content_length > storage_quota_bytes * SOFT_QUOTA_FACTOR:
  553. return dict(
  554. state = "error",
  555. status = httplib.REQUEST_ENTITY_TOO_LARGE,
  556. )
  557. return dict(
  558. state = u"uploading",
  559. received = uploading_file.total_received_bytes,
  560. size = uploading_file.content_length,
  561. );
  562. db_file = self.__database.load( File, file_id )
  563. if not db_file:
  564. return dict(
  565. state = "error",
  566. status = httplib.NOT_FOUND,
  567. )
  568. if db_file.filename is None:
  569. return dict( state = u"starting" );
  570. # the file is completely uploaded (in the database with a filename)
  571. return dict( state = u"done" );
  572. @expose( view = Json )
  573. @strongly_expire
  574. @end_transaction
  575. @grab_user_id
  576. @validate(
  577. file_id = Valid_id(),
  578. user_id = Valid_id( none_okay = True ),
  579. )
  580. def stats( self, file_id, user_id = None ):
  581. """
  582. Return information on a file that has been completely uploaded with its metadata stored in the
  583. database. Also return the user's current storage utilization in bytes.
  584. @type file_id: unicode
  585. @param file_id: id of the file to report on
  586. @type user_id: unicode or NoneType
  587. @param user_id: id of current logged-in user (if any)
  588. @rtype: dict
  589. @return: {
  590. 'filename': filename,
  591. 'size_bytes': filesize,
  592. 'storage_bytes': current storage usage by user
  593. }
  594. @raise Access_error: the current user doesn't have access to the notebook that the file is in
  595. """
  596. db_file = self.__database.load( File, file_id )
  597. if db_file is None:
  598. raise Access_error()
  599. db_notebook = self.__users.load_notebook( user_id, db_file.notebook_id )
  600. if db_notebook is None or db_notebook.read_write == Notebook.READ_WRITE_FOR_OWN_NOTES:
  601. raise Access_error()
  602. user = self.__database.load( User, user_id )
  603. if not user:
  604. raise Access_error()
  605. user.group_storage_bytes = self.__users.calculate_group_storage( user )
  606. return dict(
  607. filename = db_file.filename,
  608. size_bytes = db_file.size_bytes,
  609. storage_bytes = user.storage_bytes,
  610. )
  611. @expose( view = Json )
  612. @end_transaction
  613. @grab_user_id
  614. @validate(
  615. file_id = Valid_id(),
  616. user_id = Valid_id( none_okay = True ),
  617. )
  618. def delete( self, file_id, user_id = None ):
  619. """
  620. Delete a file that has been completely uploaded, removing both its metadata from the database
  621. and its data from the filesystem. Return the user's current storage utilization in bytes.
  622. @type file_id: unicode
  623. @param file_id: id of the file to delete
  624. @type user_id: unicode or NoneType
  625. @param user_id: id of current logged-in user (if any)
  626. @rtype: dict
  627. @return: {
  628. 'storage_bytes': current storage usage by user
  629. }
  630. @raise Access_error: the current user doesn't have access to the notebook that the file is in
  631. """
  632. db_file = self.__database.load( File, file_id )
  633. if db_file is None:
  634. raise Access_error()
  635. db_notebook = self.__users.load_notebook( user_id, db_file.notebook_id, read_write = True )
  636. if db_notebook is None or db_notebook.read_write == Notebook.READ_WRITE_FOR_OWN_NOTES:
  637. raise Access_error()
  638. self.__database.execute( db_file.sql_delete(), commit = False )
  639. user = self.__users.update_storage( user_id, commit = False )
  640. self.__database.uncache( db_file )
  641. self.__database.commit()
  642. user.group_storage_bytes = self.__users.calculate_group_storage( user )
  643. Upload_file.delete_file( file_id )
  644. return dict(
  645. storage_bytes = user.storage_bytes,
  646. )
  647. @expose( view = Json )
  648. @end_transaction
  649. @grab_user_id
  650. @validate(
  651. file_id = Valid_id(),
  652. filename = unicode,
  653. user_id = Valid_id( none_okay = True ),
  654. )
  655. def rename( self, file_id, filename, user_id = None ):
  656. """
  657. Rename a file that has been completely uploaded.
  658. @type file_id: unicode
  659. @param file_id: id of the file to delete
  660. @type filename: unicode
  661. @param filename: new name for the file
  662. @type user_id: unicode or NoneType
  663. @param user_id: id of current logged-in user (if any)
  664. @rtype: dict
  665. @return: {}
  666. @raise Access_error: the current user doesn't have access to the notebook that the file is in
  667. """
  668. db_file = self.__database.load( File, file_id )
  669. if db_file is None:
  670. raise Access_error()
  671. db_notebook = self.__users.load_notebook( user_id, db_file.notebook_id, read_write = True )
  672. if db_notebook is None or db_notebook.read_write == Notebook.READ_WRITE_FOR_OWN_NOTES:
  673. raise Access_error()
  674. db_file.filename = filename
  675. self.__database.save( db_file )
  676. return dict()
  677. def parse_csv( self, file_id, skip_header = False ):
  678. """
  679. Attempt to parse a previously uploaded file as a table or spreadsheet. Generate rows as they're
  680. requested.
  681. @type file_id: unicode
  682. @param file_id: id of the file to parse
  683. @type skip_header: bool
  684. @param skip_header: if a line of header labels is detected, don't include it in the generated
  685. rows (defaults to False)
  686. @rtype: generator
  687. @return: rows of data from the parsed file. each row is a list of elements
  688. @raise Parse_error: there was an error in parsing the given file
  689. """
  690. APPROX_SNIFF_SAMPLE_SIZE_BYTES = 1024 * 50
  691. try:
  692. import csv
  693. table_file = Upload_file.open_file( file_id )
  694. table_file.seek( 0 ) # necessary in case the file is opened by another call to parse_csv()
  695. sniffer = csv.Sniffer()
  696. # attempt to determine the presence of a header
  697. lines = table_file.readlines( APPROX_SNIFF_SAMPLE_SIZE_BYTES )
  698. sniff_sample = "".join( lines )
  699. has_header = sniffer.has_header( sniff_sample )
  700. # attempt to determine the file's character encoding
  701. detector = UniversalDetector()
  702. for line in lines:
  703. detector.feed( line )
  704. if detector.done: break
  705. detector.close()
  706. encoding = detector.result.get( "encoding" )
  707. table_file.seek( 0 )
  708. reader = csv.reader( table_file )
  709. # skip the header if requested to do so
  710. if has_header and skip_header:
  711. reader.next()
  712. expected_row_length = None
  713. for row in reader:
  714. # all rows must have the same number of elements
  715. current_row_length = len( row )
  716. if current_row_length == 0:
  717. continue
  718. if expected_row_length and current_row_length != expected_row_length:
  719. raise Parse_error()
  720. else:
  721. expected_row_length = current_row_length
  722. yield [ element.decode( encoding ) for element in row ]
  723. except ( csv.Error, IOError, TypeError ):
  724. raise Parse_error()
  725. @expose( view = Json )
  726. @end_transaction
  727. @grab_user_id
  728. @validate(
  729. file_id = Valid_id(),
  730. user_id = Valid_id( none_okay = True ),
  731. )
  732. def csv_head( self, file_id, user_id = None ):
  733. """
  734. Attempt to parse a previously uploaded file as a table or spreadsheet. Return the first few rows
  735. of that table, with each element truncated to a maximum length if necessary.
  736. Currently, only a CSV file format is supported.
  737. @type file_id: unicode
  738. @param file_id: id of the file to parse
  739. @type user_id: unicode or NoneType
  740. @param user_id: id of current logged-in user (if any)
  741. @rtype: dict
  742. @return: {
  743. 'file_id': file id,
  744. 'rows': list of parsed rows, each of which is a list of elements,
  745. }
  746. @raise Access_error: the current user doesn't have access to the notebook that the file is in
  747. @raise Parse_error: there was an error in parsing the given file
  748. """
  749. MAX_ROW_COUNT = 4
  750. MAX_ELEMENT_LENGTH = 30
  751. MAX_ROW_ELEMENT_COUNT = 20
  752. db_file = self.__database.load( File, file_id )
  753. if db_file is None:
  754. raise Access_error()
  755. db_notebook = self.__users.load_notebook( user_id, db_file.notebook_id )
  756. if db_notebook is None or db_notebook.read_write == Notebook.READ_WRITE_FOR_OWN_NOTES:
  757. raise Access_error()
  758. parser = self.parse_csv( file_id )
  759. rows = []
  760. def truncate( element ):
  761. if len( element ) > MAX_ELEMENT_LENGTH:
  762. return "%s ..." % element[ : MAX_ELEMENT_LENGTH ]
  763. return element
  764. for row in parser:
  765. if len( row ) == 0:
  766. continue
  767. rows.append( [ truncate( element ) for element in row ][ : MAX_ROW_ELEMENT_COUNT ] )
  768. if len( rows ) == MAX_ROW_COUNT:
  769. break
  770. if len( rows ) == 0:
  771. raise Parse_error()
  772. return dict(
  773. file_id = file_id,
  774. rows = rows,
  775. )
  776. def purge_unused( self, note, purge_all_links = False ):
  777. """
  778. Delete files that were linked from the given note but no longer are.
  779. @type note: model.Note
  780. @param note: note to search for file links
  781. @type purge_all_links: bool
  782. @param purge_all_links: if True, delete all files that are/were linked from this note
  783. """
  784. # load metadata for all files with the given note's note_id
  785. files = self.__database.select_many( File, File.sql_load_note_files( note.object_id ) )
  786. files_to_delete = dict( [ ( db_file.object_id, db_file ) for db_file in files ] )
  787. # search through the note's contents for current links to files
  788. if purge_all_links is False:
  789. for match in self.FILE_LINK_PATTERN.finditer( note.contents ):
  790. file_id = match.groups( 0 )[ 0 ]
  791. # we've found a link for file_id, so don't delete that file
  792. files_to_delete.pop( file_id, None )
  793. # for each file to delete, delete its metadata from the database and its data from the
  794. # filesystem
  795. for ( file_id, db_file ) in files_to_delete.items():
  796. self.__database.execute( db_file.sql_delete(), commit = False )
  797. self.__database.uncache( db_file )
  798. Upload_file.delete_file( file_id )
  799. self.__database.commit()