handle various data sources
BSZ, Naxos, Nutzergesteuerte Erwerbung (Print, Ebooks), ...
export FINCMARC as deliverable for VuFind frontend
$filelist = glob("$tdir/$workfix*"); echo "processing " . count($filelist) . " chunks\n"; foreach($filelist as $file) { foreach($source->children() as $action) { ...
for j, record in enumerate(record_iterator, start=1): ... for command in _commands: if 'skip_this' not in bag: bag = command.execute(bag) ...
QUEUE_SIZE = 20000 tasks = multiprocessing.JoinableQueue(QUEUE_SIZE) num_workers = args.workers or multiprocessing.cpu_count() * 40 workers = [ Worker(tasks, appconfig, iconf) for i in range(num_workers) ] _ = [ w.start() for w in workers ] ... with open(filename, 'r') as handle: reader = pymarc.MARCReader(handle.read(), to_unicode=True) for i, record in enumerate(reader): tasks.put(record)
isbns = set() for subfield in doc['content']['020']: if 'a' in subfield: # 020.a may look like: '9780948838903 (pbk.) :' isbns.add(subfield['a'].split()[0]) for isbn in isbns: # dedup here
$ curl -XGET 0.0.0.0:9200/_search?q=content.020.a:9781430272304
$ curl -XPOST "http://0.0.0.0:9200/nep/_search?pretty=true" -d '{ "query" : { "match_all" : {} }, "facets" : { "tags" : { "terms" : { "field" : "content.072.2" }}}}'
$ ./scroll.sh --index bsz --meta.kind=title | ./scripts/bsz.py
but ...
/