| 65 | | # TODO: Instead of parsing XML, you can probably get more direct access |
|---|
| 66 | | # to the document nodes (e.g., in a dictionary) |
|---|
| 67 | | # but the documentation on this is incomplete as of this writing: |
|---|
| 68 | | # http://docutils.sourceforge.net/docs/dev/hacking.html#the-node-interface |
|---|
| 69 | | xmldoc = publish_string(result, writer_name='xml', |
|---|
| 70 | | settings_overrides = {'input_encoding': 'utf-8', |
|---|
| 71 | | 'output_encoding': 'utf-8'}) |
|---|
| 72 | | xmldoc = minidom.parseString(xmldoc) |
|---|
| 73 | | x=xmldoc.childNodes[2].childNodes |
|---|
| 74 | | # the first 3 child Nodes seem to be the ones that contain doc metadata we need, |
|---|
| 75 | | # too bad there's no uniformity to how the information (i.e. field data) is stored |
|---|
| 76 | | title = x[0].childNodes[0].nodeValue |
|---|
| 77 | | # stuff from the field list |
|---|
| 78 | | author = x[1].getElementsByTagName('author')[0].childNodes[0].nodeValue |
|---|
| 79 | | date = x[1].getElementsByTagName('date')[0].childNodes[0].nodeValue |
|---|
| 80 | | # theoretically from field list too, but docutils treats it separately |
|---|
| 81 | | abstract = x[2].childNodes[1].childNodes[0].nodeValue |
|---|
| 82 | | # TODO: Keywords support -- a field list item not supported by docutils |
|---|
| 83 | | |
|---|
| 84 | | # Now remove the abstract from the reST doc, so that it's not re-processed |
|---|
| 85 | | # and placed wrongly in the LaTeX doc (NOTE for later: leave it alone for HTML |
|---|
| 86 | | # processing) |
|---|
| 87 | | regexp = re.compile(":Abstract:.*\n") |
|---|
| 88 | | result = re.sub(regexp, "", result) |
|---|
| 89 | | |
|---|
| 90 | | # write new file |
|---|
| 91 | | orig_filepath_noext = orig_file.split('.')[0] |
|---|
| 92 | | new_file = orig_filepath_noext + "_preprocessed.rst" |
|---|
| 93 | | processed_rst = _write_file(result, new_file) |
|---|
| 94 | | lchar_orig_path = orig_filepath_noext.rfind('/') |
|---|
| 95 | | # python doesn't have a function to get the directory of a given file? |
|---|
| 96 | | if lchar_orig_path > -1: |
|---|
| 97 | | orig_path = orig_filepath_noext[:lchar_orig_path] |
|---|
| 98 | | orig_filename_noext = orig_filepath_noext[lchar_orig_path+1:] |
|---|
| 99 | | else: |
|---|
| 100 | | orig_path = "./" |
|---|
| 101 | | orig_filename_noext = orig_filepath_noext |
|---|
| 102 | | |
|---|
| 103 | | # do the latex conversion and write the latex file |
|---|
| 104 | | latex = publish_string(result, writer_name='latex', |
|---|
| 105 | | settings_overrides = {'input_encoding': 'utf-8', |
|---|
| 106 | | 'output_encoding': 'utf-8'}) |
|---|
| 107 | | |
|---|
| 108 | | # UGLY HACK: replace the uselessly complex preamble that docutils gives |
|---|
| 109 | | #regexp = re.compile("\\\documentclass.*\\raggedbottom", re.DOTALL) |
|---|
| 110 | | #latex = re.sub(regexp, "", latex) |
|---|
| 111 | | #begin=latex.find("\\begin{document}") |
|---|
| 112 | | begin = latex.find("\setlength{\locallinewidth}{\linewidth}") |
|---|
| 113 | | begin = begin + 39 |
|---|
| 114 | | # print "%###################################" |
|---|
| 115 | | latex=latex[begin:] |
|---|
| 116 | | #latex=latex.replace("\setlength{\locallinewidth}{\linewidth}", "%\setlength{\locallinewidth}{\linewidth}") |
|---|
| 117 | | |
|---|
| 118 | | # setting the linespacing by finding docutils-placed section breaks |
|---|
| 119 | | # seems to be the best option, in order not to do it prematurely |
|---|
| 120 | | regexp = re.compile("\%______.*\n") |
|---|
| 121 | | latex = re.sub(regexp, "%______SECTION BREAK_______\n\setstretch{" \ |
|---|
| 122 | | + options.linespacing + "}\n", latex) |
|---|
| 123 | | |
|---|
| 124 | | # END NOTES? |
|---|
| 125 | | # abfnNrtuUvox -- need double slashes |
|---|
| 126 | | if options.endnotes: |
|---|
| 127 | | endnotes = "\\usepackage{endnotes}\n" |
|---|
| 128 | | # Unnecessary?: \let\\footnote=\endnote\n |
|---|
| 129 | | header = header.replace('$HEADER_OPTIONS', endnotes) |
|---|
| 130 | | endnotes_footer = "\\theendnotes\n\\newpage\n" |
|---|
| 131 | | else: |
|---|
| 132 | | header = header.replace('$HEADER_OPTIONS', "") |
|---|
| 133 | | endnotes_footer="" |
|---|
| 134 | | if options.nocite: |
|---|
| 135 | | nocite = "\\nocite{*}\n" |
|---|
| 136 | | else: |
|---|
| 137 | | nocite = "" |
|---|
| 138 | | |
|---|
| 139 | | # do footer, with bibliography |
|---|
| 140 | | latex = latex.replace("\\end{document}", "\\newpage\n\\setstretch{1}\n" \ |
|---|
| 141 | | "%s%s\\bibliographystyle{%s}\n\\bibliography{%s}\n" \ |
|---|
| 142 | | "\\end{document}\n" % (endnotes_footer, nocite, options.bibstyle, options.bibdb)) |
|---|
| 143 | | # NOTE: docutils snapshot has use-bibtex option, not used here. |
|---|
| 144 | | |
|---|
| 145 | | # get rid of space between end of sentence and footnote |
|---|
| 146 | | latex = latex.replace(" \\footnote", "\\footnote") |
|---|
| 147 | | |
|---|
| 148 | | header = header.replace('$TITLE', unicode(title)) |
|---|
| 149 | | header = header.replace('$AUTHOR', unicode(author)) |
|---|
| 150 | | header = header.replace('$DATE', unicode(date)) |
|---|
| 151 | | header = header.replace('$ABSTRACT', unicode(abstract)) |
|---|
| 152 | | # TODO: REMOVE the docutils-generated abstract :( |
|---|
| 153 | | |
|---|
| 154 | | latex = header.encode('utf8') + latex |
|---|
| 155 | | new_file = orig_filepath_noext + ".tex" |
|---|
| 156 | | processed_latex = _write_file(latex, new_file) |
|---|
| 157 | | |
|---|
| 158 | | os.chdir(orig_path) |
|---|
| 159 | | # now do the latex / bibtex stuff |
|---|
| 160 | | os.system('latex ' + orig_filename_noext) # + ' > /tmp/silent') |
|---|
| 161 | | os.system('bibtex ' + orig_filename_noext) # + ' > /tmp/silent') |
|---|
| 162 | | os.system('latex ' + orig_filename_noext) # + ' > /tmp/silent') |
|---|
| 163 | | os.system('latex ' + orig_filename_noext) # + ' > /tmp/silent') |
|---|
| 164 | | # and convert to PDF |
|---|
| 165 | | os.system('dvipdfm ' + orig_filename_noext + ".dvi") # + ' > /tmp/silent') |
|---|
| 166 | | |
|---|
| 167 | | |
|---|
| 168 | | def _write_file(str, filename): |
|---|
| 169 | | # write new file |
|---|
| 170 | | try: |
|---|
| 171 | | file_obj=file(filename, 'w') |
|---|
| 172 | | except IOError: |
|---|
| 173 | | print "*** ERROR: Can not write to file " + str(os.getcwd()) + "/" + origFile |
|---|
| 174 | | sys.exit(1) |
|---|
| 175 | | file_obj.write(str) |
|---|
| 176 | | file_obj.close() |
|---|
| 177 | | return True |
|---|
| 178 | | |
|---|
| 179 | | def _recurse_nodes(nodes, level): |
|---|
| 180 | | # currently not used, but useful for visualizing the DOM |
|---|
| 181 | | # after processing a reST doc, but before publishing it |
|---|
| 182 | | # NOTE: could be useful to turn items into a dictionary |
|---|
| 183 | | for i in nodes: |
|---|
| 184 | | print str(level) + " " + str(i.nodeName) + " = " + unicode(i.nodeValue) |
|---|
| 185 | | #if i.nodeType == 3: # TEXT_NODE -- for viewing the values b/w opening and closing tags |
|---|
| 186 | | #print str(level) + " " + i.parentNode.nodeName + " = " + unicode(i.nodeValue) |
|---|
| 187 | | if i.hasChildNodes(): |
|---|
| 188 | | _recurse_nodes(i.childNodes, level + 1) |
|---|
| 189 | | |
|---|
| 190 | | |
|---|