1 var pdfReader = new PdfReader("xxx.pdf"); 2 3 StreamWriter output = new StreamWriter(new FileStream("处理结果.txt", FileMode.Create)); 4 5 int pageCount = pdfReader.NumberOfPages; 6 for (int pg = 1; pg <= pageCount; pg++) 7 { 8 ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); 9 var value = PdfTextExtractor.GetTextFromPage(pdfReader, pg, strategy);10 value = value.Replace(" ", "");11 Console.WriteLine(value);12 output.Write(value);13 }14 15 output.Flush();16 output.Close();17 Console.Write("处理完毕");18 Console.ReadLine();
该方法读出的汉字不会乱码。