|
将替换的文字写入新的PDF
- import fitz
- import string
- doc = fitz.open("study_physics_inUSTC.pdf")
- newdoc = fitz.open("")
- def filter_unicode_range(str):
- filtered_str = ""
- for char in str:
- if char.isdigit() or char in string.punctuation or char == '。' or char == '、' or char == '—' or '\u4E00' <= char <= '\u9FFF' or '\uFF00' <= char <= '\uFFEF':
- filtered_str += char
- return filtered_str
- font=fitz.Font('china-s')
- for page in doc:
- newpage=newdoc.new_page()
- tw_black = fitz.TextWriter(newpage.rect, color=(0,0,0))
- tw_red = fitz.TextWriter(newpage.rect, color=(1.0, 0.14901960784313725, 0.0))
- blocks=page.get_text("dict")["blocks"]
- for block in blocks:
- for line in block["lines"]:
- for span in line["spans"]:
- text = span["text"]
- tw = tw_black if span["color"]==0 else tw_red
- tw.append(
- span["origin"],
- filter_unicode_range(text),
- font=font,
- fontsize=span["size"]
- )
- tw_black.write_text(newpage)
- tw_red.write_text(newpage)
- newdoc.subset_fonts()
- newdoc.save("output.pdf",clean=True)
复制代码
得到文件尺寸为4MB
Ilovepdf选择Extreme Compression得到文件尺寸为176KB
output_compressed.pdf
(175.99 KB, 下载次数: 0)
|
|