|
@@ -0,0 +1,420 @@
|
|
|
|
+#!/usr/local/opt/[email protected]/bin/python3.8
|
|
|
|
+# Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd
|
|
|
|
+# This script is part of the xlrd package, which is released under a
|
|
|
|
+# BSD-style licence.
|
|
|
|
+
|
|
|
|
+from __future__ import print_function
|
|
|
|
+
|
|
|
|
+cmd_doc = """
|
|
|
|
+Commands:
|
|
|
|
+
|
|
|
|
+2rows Print the contents of first and last row in each sheet
|
|
|
|
+3rows Print the contents of first, second and last row in each sheet
|
|
|
|
+bench Same as "show", but doesn't print -- for profiling
|
|
|
|
+biff_count[1] Print a count of each type of BIFF record in the file
|
|
|
|
+biff_dump[1] Print a dump (char and hex) of the BIFF records in the file
|
|
|
|
+fonts hdr + print a dump of all font objects
|
|
|
|
+hdr Mini-overview of file (no per-sheet information)
|
|
|
|
+hotshot Do a hotshot profile run e.g. ... -f1 hotshot bench bigfile*.xls
|
|
|
|
+labels Dump of sheet.col_label_ranges and ...row... for each sheet
|
|
|
|
+name_dump Dump of each object in book.name_obj_list
|
|
|
|
+names Print brief information for each NAME record
|
|
|
|
+ov Overview of file
|
|
|
|
+profile Like "hotshot", but uses cProfile
|
|
|
|
+show Print the contents of all rows in each sheet
|
|
|
|
+version[0] Print versions of xlrd and Python and exit
|
|
|
|
+xfc Print "XF counts" and cell-type counts -- see code for details
|
|
|
|
+
|
|
|
|
+[0] means no file arg
|
|
|
|
+[1] means only one file arg i.e. no glob.glob pattern
|
|
|
|
+"""
|
|
|
|
+
|
|
|
|
+options = None
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
+
|
|
|
|
+ PSYCO = 0
|
|
|
|
+
|
|
|
|
+ import xlrd
|
|
|
|
+ import sys
|
|
|
|
+ import time
|
|
|
|
+ import glob
|
|
|
|
+ import traceback
|
|
|
|
+ import gc
|
|
|
|
+
|
|
|
|
+ from xlrd.timemachine import xrange, REPR
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ class LogHandler(object):
|
|
|
|
+
|
|
|
|
+ def __init__(self, logfileobj):
|
|
|
|
+ self.logfileobj = logfileobj
|
|
|
|
+ self.fileheading = None
|
|
|
|
+ self.shown = 0
|
|
|
|
+
|
|
|
|
+ def setfileheading(self, fileheading):
|
|
|
|
+ self.fileheading = fileheading
|
|
|
|
+ self.shown = 0
|
|
|
|
+
|
|
|
|
+ def write(self, text):
|
|
|
|
+ if self.fileheading and not self.shown:
|
|
|
|
+ self.logfileobj.write(self.fileheading)
|
|
|
|
+ self.shown = 1
|
|
|
|
+ self.logfileobj.write(text)
|
|
|
|
+
|
|
|
|
+ null_cell = xlrd.empty_cell
|
|
|
|
+
|
|
|
|
+ def show_row(bk, sh, rowx, colrange, printit):
|
|
|
|
+ if bk.ragged_rows:
|
|
|
|
+ colrange = range(sh.row_len(rowx))
|
|
|
|
+ if not colrange: return
|
|
|
|
+ if printit: print()
|
|
|
|
+ if bk.formatting_info:
|
|
|
|
+ for colx, ty, val, cxfx in get_row_data(bk, sh, rowx, colrange):
|
|
|
|
+ if printit:
|
|
|
|
+ print("cell %s%d: type=%d, data: %r, xfx: %s"
|
|
|
|
+ % (xlrd.colname(colx), rowx+1, ty, val, cxfx))
|
|
|
|
+ else:
|
|
|
|
+ for colx, ty, val, _unused in get_row_data(bk, sh, rowx, colrange):
|
|
|
|
+ if printit:
|
|
|
|
+ print("cell %s%d: type=%d, data: %r" % (xlrd.colname(colx), rowx+1, ty, val))
|
|
|
|
+
|
|
|
|
+ def get_row_data(bk, sh, rowx, colrange):
|
|
|
|
+ result = []
|
|
|
|
+ dmode = bk.datemode
|
|
|
|
+ ctys = sh.row_types(rowx)
|
|
|
|
+ cvals = sh.row_values(rowx)
|
|
|
|
+ for colx in colrange:
|
|
|
|
+ cty = ctys[colx]
|
|
|
|
+ cval = cvals[colx]
|
|
|
|
+ if bk.formatting_info:
|
|
|
|
+ cxfx = str(sh.cell_xf_index(rowx, colx))
|
|
|
|
+ else:
|
|
|
|
+ cxfx = ''
|
|
|
|
+ if cty == xlrd.XL_CELL_DATE:
|
|
|
|
+ try:
|
|
|
|
+ showval = xlrd.xldate_as_tuple(cval, dmode)
|
|
|
|
+ except xlrd.XLDateError as e:
|
|
|
|
+ showval = "%s:%s" % (type(e).__name__, e)
|
|
|
|
+ cty = xlrd.XL_CELL_ERROR
|
|
|
|
+ elif cty == xlrd.XL_CELL_ERROR:
|
|
|
|
+ showval = xlrd.error_text_from_code.get(cval, '<Unknown error code 0x%02x>' % cval)
|
|
|
|
+ else:
|
|
|
|
+ showval = cval
|
|
|
|
+ result.append((colx, cty, showval, cxfx))
|
|
|
|
+ return result
|
|
|
|
+
|
|
|
|
+ def bk_header(bk):
|
|
|
|
+ print()
|
|
|
|
+ print("BIFF version: %s; datemode: %s"
|
|
|
|
+ % (xlrd.biff_text_from_num[bk.biff_version], bk.datemode))
|
|
|
|
+ print("codepage: %r (encoding: %s); countries: %r"
|
|
|
|
+ % (bk.codepage, bk.encoding, bk.countries))
|
|
|
|
+ print("Last saved by: %r" % bk.user_name)
|
|
|
|
+ print("Number of data sheets: %d" % bk.nsheets)
|
|
|
|
+ print("Use mmap: %d; Formatting: %d; On demand: %d"
|
|
|
|
+ % (bk.use_mmap, bk.formatting_info, bk.on_demand))
|
|
|
|
+ print("Ragged rows: %d" % bk.ragged_rows)
|
|
|
|
+ if bk.formatting_info:
|
|
|
|
+ print("FORMATs: %d, FONTs: %d, XFs: %d"
|
|
|
|
+ % (len(bk.format_list), len(bk.font_list), len(bk.xf_list)))
|
|
|
|
+ if not options.suppress_timing:
|
|
|
|
+ print("Load time: %.2f seconds (stage 1) %.2f seconds (stage 2)"
|
|
|
|
+ % (bk.load_time_stage_1, bk.load_time_stage_2))
|
|
|
|
+ print()
|
|
|
|
+
|
|
|
|
+ def show_fonts(bk):
|
|
|
|
+ print("Fonts:")
|
|
|
|
+ for x in xrange(len(bk.font_list)):
|
|
|
|
+ font = bk.font_list[x]
|
|
|
|
+ font.dump(header='== Index %d ==' % x, indent=4)
|
|
|
|
+
|
|
|
|
+ def show_names(bk, dump=0):
|
|
|
|
+ bk_header(bk)
|
|
|
|
+ if bk.biff_version < 50:
|
|
|
|
+ print("Names not extracted in this BIFF version")
|
|
|
|
+ return
|
|
|
|
+ nlist = bk.name_obj_list
|
|
|
|
+ print("Name list: %d entries" % len(nlist))
|
|
|
|
+ for nobj in nlist:
|
|
|
|
+ if dump:
|
|
|
|
+ nobj.dump(sys.stdout,
|
|
|
|
+ header="\n=== Dump of name_obj_list[%d] ===" % nobj.name_index)
|
|
|
|
+ else:
|
|
|
|
+ print("[%d]\tName:%r macro:%r scope:%d\n\tresult:%r\n"
|
|
|
|
+ % (nobj.name_index, nobj.name, nobj.macro, nobj.scope, nobj.result))
|
|
|
|
+
|
|
|
|
+ def print_labels(sh, labs, title):
|
|
|
|
+ if not labs:return
|
|
|
|
+ for rlo, rhi, clo, chi in labs:
|
|
|
|
+ print("%s label range %s:%s contains:"
|
|
|
|
+ % (title, xlrd.cellname(rlo, clo), xlrd.cellname(rhi-1, chi-1)))
|
|
|
|
+ for rx in xrange(rlo, rhi):
|
|
|
|
+ for cx in xrange(clo, chi):
|
|
|
|
+ print(" %s: %r" % (xlrd.cellname(rx, cx), sh.cell_value(rx, cx)))
|
|
|
|
+
|
|
|
|
+ def show_labels(bk):
|
|
|
|
+ # bk_header(bk)
|
|
|
|
+ hdr = 0
|
|
|
|
+ for shx in range(bk.nsheets):
|
|
|
|
+ sh = bk.sheet_by_index(shx)
|
|
|
|
+ clabs = sh.col_label_ranges
|
|
|
|
+ rlabs = sh.row_label_ranges
|
|
|
|
+ if clabs or rlabs:
|
|
|
|
+ if not hdr:
|
|
|
|
+ bk_header(bk)
|
|
|
|
+ hdr = 1
|
|
|
|
+ print("sheet %d: name = %r; nrows = %d; ncols = %d" %
|
|
|
|
+ (shx, sh.name, sh.nrows, sh.ncols))
|
|
|
|
+ print_labels(sh, clabs, 'Col')
|
|
|
|
+ print_labels(sh, rlabs, 'Row')
|
|
|
|
+ if bk.on_demand: bk.unload_sheet(shx)
|
|
|
|
+
|
|
|
|
+ def show(bk, nshow=65535, printit=1):
|
|
|
|
+ bk_header(bk)
|
|
|
|
+ if 0:
|
|
|
|
+ rclist = xlrd.sheet.rc_stats.items()
|
|
|
|
+ rclist = sorted(rclist)
|
|
|
|
+ print("rc stats")
|
|
|
|
+ for k, v in rclist:
|
|
|
|
+ print("0x%04x %7d" % (k, v))
|
|
|
|
+ if options.onesheet:
|
|
|
|
+ try:
|
|
|
|
+ shx = int(options.onesheet)
|
|
|
|
+ except ValueError:
|
|
|
|
+ shx = bk.sheet_by_name(options.onesheet).number
|
|
|
|
+ shxrange = [shx]
|
|
|
|
+ else:
|
|
|
|
+ shxrange = range(bk.nsheets)
|
|
|
|
+ # print("shxrange", list(shxrange))
|
|
|
|
+ for shx in shxrange:
|
|
|
|
+ sh = bk.sheet_by_index(shx)
|
|
|
|
+ nrows, ncols = sh.nrows, sh.ncols
|
|
|
|
+ colrange = range(ncols)
|
|
|
|
+ anshow = min(nshow, nrows)
|
|
|
|
+ print("sheet %d: name = %s; nrows = %d; ncols = %d" %
|
|
|
|
+ (shx, REPR(sh.name), sh.nrows, sh.ncols))
|
|
|
|
+ if nrows and ncols:
|
|
|
|
+ # Beat the bounds
|
|
|
|
+ for rowx in xrange(nrows):
|
|
|
|
+ nc = sh.row_len(rowx)
|
|
|
|
+ if nc:
|
|
|
|
+ sh.row_types(rowx)[nc-1]
|
|
|
|
+ sh.row_values(rowx)[nc-1]
|
|
|
|
+ sh.cell(rowx, nc-1)
|
|
|
|
+ for rowx in xrange(anshow-1):
|
|
|
|
+ if not printit and rowx % 10000 == 1 and rowx > 1:
|
|
|
|
+ print("done %d rows" % (rowx-1,))
|
|
|
|
+ show_row(bk, sh, rowx, colrange, printit)
|
|
|
|
+ if anshow and nrows:
|
|
|
|
+ show_row(bk, sh, nrows-1, colrange, printit)
|
|
|
|
+ print()
|
|
|
|
+ if bk.on_demand: bk.unload_sheet(shx)
|
|
|
|
+
|
|
|
|
+ def count_xfs(bk):
|
|
|
|
+ bk_header(bk)
|
|
|
|
+ for shx in range(bk.nsheets):
|
|
|
|
+ sh = bk.sheet_by_index(shx)
|
|
|
|
+ nrows = sh.nrows
|
|
|
|
+ print("sheet %d: name = %r; nrows = %d; ncols = %d" %
|
|
|
|
+ (shx, sh.name, sh.nrows, sh.ncols))
|
|
|
|
+ # Access all xfindexes to force gathering stats
|
|
|
|
+ type_stats = [0, 0, 0, 0, 0, 0, 0]
|
|
|
|
+ for rowx in xrange(nrows):
|
|
|
|
+ for colx in xrange(sh.row_len(rowx)):
|
|
|
|
+ xfx = sh.cell_xf_index(rowx, colx)
|
|
|
|
+ assert xfx >= 0
|
|
|
|
+ cty = sh.cell_type(rowx, colx)
|
|
|
|
+ type_stats[cty] += 1
|
|
|
|
+ print("XF stats", sh._xf_index_stats)
|
|
|
|
+ print("type stats", type_stats)
|
|
|
|
+ print()
|
|
|
|
+ if bk.on_demand: bk.unload_sheet(shx)
|
|
|
|
+
|
|
|
|
+ def main(cmd_args):
|
|
|
|
+ import optparse
|
|
|
|
+ global options, PSYCO
|
|
|
|
+ usage = "\n%prog [options] command [input-file-patterns]\n" + cmd_doc
|
|
|
|
+ oparser = optparse.OptionParser(usage)
|
|
|
|
+ oparser.add_option(
|
|
|
|
+ "-l", "--logfilename",
|
|
|
|
+ default="",
|
|
|
|
+ help="contains error messages")
|
|
|
|
+ oparser.add_option(
|
|
|
|
+ "-v", "--verbosity",
|
|
|
|
+ type="int", default=0,
|
|
|
|
+ help="level of information and diagnostics provided")
|
|
|
|
+ oparser.add_option(
|
|
|
|
+ "-m", "--mmap",
|
|
|
|
+ type="int", default=-1,
|
|
|
|
+ help="1: use mmap; 0: don't use mmap; -1: accept heuristic")
|
|
|
|
+ oparser.add_option(
|
|
|
|
+ "-e", "--encoding",
|
|
|
|
+ default="",
|
|
|
|
+ help="encoding override")
|
|
|
|
+ oparser.add_option(
|
|
|
|
+ "-f", "--formatting",
|
|
|
|
+ type="int", default=0,
|
|
|
|
+ help="0 (default): no fmt info\n"
|
|
|
|
+ "1: fmt info (all cells)\n",
|
|
|
|
+ )
|
|
|
|
+ oparser.add_option(
|
|
|
|
+ "-g", "--gc",
|
|
|
|
+ type="int", default=0,
|
|
|
|
+ help="0: auto gc enabled; 1: auto gc disabled, manual collect after each file; 2: no gc")
|
|
|
|
+ oparser.add_option(
|
|
|
|
+ "-s", "--onesheet",
|
|
|
|
+ default="",
|
|
|
|
+ help="restrict output to this sheet (name or index)")
|
|
|
|
+ oparser.add_option(
|
|
|
|
+ "-u", "--unnumbered",
|
|
|
|
+ action="store_true", default=0,
|
|
|
|
+ help="omit line numbers or offsets in biff_dump")
|
|
|
|
+ oparser.add_option(
|
|
|
|
+ "-d", "--on-demand",
|
|
|
|
+ action="store_true", default=0,
|
|
|
|
+ help="load sheets on demand instead of all at once")
|
|
|
|
+ oparser.add_option(
|
|
|
|
+ "-t", "--suppress-timing",
|
|
|
|
+ action="store_true", default=0,
|
|
|
|
+ help="don't print timings (diffs are less messy)")
|
|
|
|
+ oparser.add_option(
|
|
|
|
+ "-r", "--ragged-rows",
|
|
|
|
+ action="store_true", default=0,
|
|
|
|
+ help="open_workbook(..., ragged_rows=True)")
|
|
|
|
+ options, args = oparser.parse_args(cmd_args)
|
|
|
|
+ if len(args) == 1 and args[0] in ("version", ):
|
|
|
|
+ pass
|
|
|
|
+ elif len(args) < 2:
|
|
|
|
+ oparser.error("Expected at least 2 args, found %d" % len(args))
|
|
|
|
+ cmd = args[0]
|
|
|
|
+ xlrd_version = getattr(xlrd, "__VERSION__", "unknown; before 0.5")
|
|
|
|
+ if cmd == 'biff_dump':
|
|
|
|
+ xlrd.dump(args[1], unnumbered=options.unnumbered)
|
|
|
|
+ sys.exit(0)
|
|
|
|
+ if cmd == 'biff_count':
|
|
|
|
+ xlrd.count_records(args[1])
|
|
|
|
+ sys.exit(0)
|
|
|
|
+ if cmd == 'version':
|
|
|
|
+ print("xlrd: %s, from %s" % (xlrd_version, xlrd.__file__))
|
|
|
|
+ print("Python:", sys.version)
|
|
|
|
+ sys.exit(0)
|
|
|
|
+ if options.logfilename:
|
|
|
|
+ logfile = LogHandler(open(options.logfilename, 'w'))
|
|
|
|
+ else:
|
|
|
|
+ logfile = sys.stdout
|
|
|
|
+ mmap_opt = options.mmap
|
|
|
|
+ mmap_arg = xlrd.USE_MMAP
|
|
|
|
+ if mmap_opt in (1, 0):
|
|
|
|
+ mmap_arg = mmap_opt
|
|
|
|
+ elif mmap_opt != -1:
|
|
|
|
+ print('Unexpected value (%r) for mmap option -- assuming default' % mmap_opt)
|
|
|
|
+ fmt_opt = options.formatting | (cmd in ('xfc', ))
|
|
|
|
+ gc_mode = options.gc
|
|
|
|
+ if gc_mode:
|
|
|
|
+ gc.disable()
|
|
|
|
+ for pattern in args[1:]:
|
|
|
|
+ for fname in glob.glob(pattern):
|
|
|
|
+ print("\n=== File: %s ===" % fname)
|
|
|
|
+ if logfile != sys.stdout:
|
|
|
|
+ logfile.setfileheading("\n=== File: %s ===\n" % fname)
|
|
|
|
+ if gc_mode == 1:
|
|
|
|
+ n_unreachable = gc.collect()
|
|
|
|
+ if n_unreachable:
|
|
|
|
+ print("GC before open:", n_unreachable, "unreachable objects")
|
|
|
|
+ if PSYCO:
|
|
|
|
+ import psyco
|
|
|
|
+ psyco.full()
|
|
|
|
+ PSYCO = 0
|
|
|
|
+ try:
|
|
|
|
+ t0 = time.time()
|
|
|
|
+ bk = xlrd.open_workbook(
|
|
|
|
+ fname,
|
|
|
|
+ verbosity=options.verbosity, logfile=logfile,
|
|
|
|
+ use_mmap=mmap_arg,
|
|
|
|
+ encoding_override=options.encoding,
|
|
|
|
+ formatting_info=fmt_opt,
|
|
|
|
+ on_demand=options.on_demand,
|
|
|
|
+ ragged_rows=options.ragged_rows,
|
|
|
|
+ )
|
|
|
|
+ t1 = time.time()
|
|
|
|
+ if not options.suppress_timing:
|
|
|
|
+ print("Open took %.2f seconds" % (t1-t0,))
|
|
|
|
+ except xlrd.XLRDError as e:
|
|
|
|
+ print("*** Open failed: %s: %s" % (type(e).__name__, e))
|
|
|
|
+ continue
|
|
|
|
+ except KeyboardInterrupt:
|
|
|
|
+ print("*** KeyboardInterrupt ***")
|
|
|
|
+ traceback.print_exc(file=sys.stdout)
|
|
|
|
+ sys.exit(1)
|
|
|
|
+ except BaseException as e:
|
|
|
|
+ print("*** Open failed: %s: %s" % (type(e).__name__, e))
|
|
|
|
+ traceback.print_exc(file=sys.stdout)
|
|
|
|
+ continue
|
|
|
|
+ t0 = time.time()
|
|
|
|
+ if cmd == 'hdr':
|
|
|
|
+ bk_header(bk)
|
|
|
|
+ elif cmd == 'ov': # OverView
|
|
|
|
+ show(bk, 0)
|
|
|
|
+ elif cmd == 'show': # all rows
|
|
|
|
+ show(bk)
|
|
|
|
+ elif cmd == '2rows': # first row and last row
|
|
|
|
+ show(bk, 2)
|
|
|
|
+ elif cmd == '3rows': # first row, 2nd row and last row
|
|
|
|
+ show(bk, 3)
|
|
|
|
+ elif cmd == 'bench':
|
|
|
|
+ show(bk, printit=0)
|
|
|
|
+ elif cmd == 'fonts':
|
|
|
|
+ bk_header(bk)
|
|
|
|
+ show_fonts(bk)
|
|
|
|
+ elif cmd == 'names': # named reference list
|
|
|
|
+ show_names(bk)
|
|
|
|
+ elif cmd == 'name_dump': # named reference list
|
|
|
|
+ show_names(bk, dump=1)
|
|
|
|
+ elif cmd == 'labels':
|
|
|
|
+ show_labels(bk)
|
|
|
|
+ elif cmd == 'xfc':
|
|
|
|
+ count_xfs(bk)
|
|
|
|
+ else:
|
|
|
|
+ print("*** Unknown command <%s>" % cmd)
|
|
|
|
+ sys.exit(1)
|
|
|
|
+ del bk
|
|
|
|
+ if gc_mode == 1:
|
|
|
|
+ n_unreachable = gc.collect()
|
|
|
|
+ if n_unreachable:
|
|
|
|
+ print("GC post cmd:", fname, "->", n_unreachable, "unreachable objects")
|
|
|
|
+ if not options.suppress_timing:
|
|
|
|
+ t1 = time.time()
|
|
|
|
+ print("\ncommand took %.2f seconds\n" % (t1-t0,))
|
|
|
|
+
|
|
|
|
+ return None
|
|
|
|
+
|
|
|
|
+ av = sys.argv[1:]
|
|
|
|
+ if not av:
|
|
|
|
+ main(av)
|
|
|
|
+ firstarg = av[0].lower()
|
|
|
|
+ if firstarg == "hotshot":
|
|
|
|
+ import hotshot
|
|
|
|
+ import hotshot.stats
|
|
|
|
+ av = av[1:]
|
|
|
|
+ prof_log_name = "XXXX.prof"
|
|
|
|
+ prof = hotshot.Profile(prof_log_name)
|
|
|
|
+ # benchtime, result = prof.runcall(main, *av)
|
|
|
|
+ result = prof.runcall(main, *(av, ))
|
|
|
|
+ print("result", repr(result))
|
|
|
|
+ prof.close()
|
|
|
|
+ stats = hotshot.stats.load(prof_log_name)
|
|
|
|
+ stats.strip_dirs()
|
|
|
|
+ stats.sort_stats('time', 'calls')
|
|
|
|
+ stats.print_stats(20)
|
|
|
|
+ elif firstarg == "profile":
|
|
|
|
+ import cProfile
|
|
|
|
+ av = av[1:]
|
|
|
|
+ cProfile.run('main(av)', 'YYYY.prof')
|
|
|
|
+ import pstats
|
|
|
|
+ p = pstats.Stats('YYYY.prof')
|
|
|
|
+ p.strip_dirs().sort_stats('cumulative').print_stats(30)
|
|
|
|
+ elif firstarg == "psyco":
|
|
|
|
+ PSYCO = 1
|
|
|
|
+ main(av[1:])
|
|
|
|
+ else:
|
|
|
|
+ main(av)
|