# $NetBSD: nanpa.sed,v 1.3 2023/01/28 13:12:16 jmcneill Exp $ # # Parse HTML tables output by # http://docs.nanpa.com/cgi-bin/npa_reports/nanpa # Specifically, for each html table row (TR), # print the elements separated by colons. # # This could break on HTML comments. # :top # Strip ^Ms s/ //g # Join all lines with unterminated HTML tags /<[^>]*$/{ N b top } # Replace all with EOL tag s;;$;g # Join lines with only . /<[Tt][Rr][^>]*>$/{ N s/\n//g b top } # Also, join all lines starting with . /<[TtRr][^>]*>[^$]*$/{ N s/\n//g b top } # Remove EOL markers s/\$$// # Remove lines not starting with /<[Tt][Rr][^>]*>/!d # Replace all with colon s/[ ]*<[Tt][Dd][^>]*> */:/g # Strip all HTML tags s/<[^>]*>//g # Handle HTML characters s/ / /g # Compress spaces/tabs s/[ ][ ]*/ /g # Strip leading colons s/:// # Strip leading/trailing whitespace s/ *// s/ $// # Strip HTML comments s/^--.*$//