How Read Contents Of Txt Files In Different Directories And Rename Other Files According To
I just started with Python 3 and ran into the following problem: I downloaded a good deal of PDFs from different journals for my thesis, but they are all named after their DOI and
Solution 1:
Finally got it to work:
#__author__ = 'Telefonmann'
# -*- coding: utf-8 -*-
import os, re, ntpath, shutil
for root, dirs, files in os.walk(os.getcwd()):
for file in files: # loops through directories and files
if file.endswith(('.txt')): # only processes txt files
full_path = ntpath.splitdrive(ntpath.join(root, file))[1]
# builds correct path under Win 7 (and probably other NT-systems
with open(full_path, 'r', encoding='utf-8') as f:
content = f.read().replace('\n', '') # remove newline
r = re.compile('To\s*cite\s*this\s*article:\s*(.*?),\s*Journal\s*of\s*Quantitative\s*Linguistics\s*,')
m = r.search(content)
# finds substring inbetween "To cite this article: " and "Journal of Quantitative Linguistics,"
# also finds typos like "Journal ofQuantitative ..."
if m:
full_title = m.group(1)
print("full_title: {0}".format(full_title))
full_title = (full_title.replace('<','') # removes/replaces forbidden characters in Windows file names
.replace('>','')
.replace(':',' -')
.replace('"','')
.replace('/','')
.replace('\\','')
.replace('|','')
.replace('?','')
.replace('*',''))
pdf_name = full_path.replace('txt','pdf')
# since txt and pdf files only differ in their format extension I simply replace .txt with .pdf
# to get the right name
print('File: '+ file)
print('Full Path: ' + full_path)
print('Full Title: ' + full_title)
print('PDF Name: ' + pdf_name)
print('....................................')
# for trouble shooting
dirname = ntpath.dirname(pdf_name)
new_path = ntpath.join(dirname, "{0}.pdf".format(full_title))
if ntpath.exists(full_path):
print("all paths found")
shutil.copy(pdf_name, new_path)
# makes a copy of the pdf file with the new name in the respective directory
Post a Comment for "How Read Contents Of Txt Files In Different Directories And Rename Other Files According To"