package main import ( "context" "fmt" "io" "log" "net/http" "os" "os/exec" "strings" "github.com/PuerkitoBio/goquery" "github.com/chromedp/chromedp" ) const baseURI = "https://www.rottenacker.de/" func main() { // Set the path to the Chromium binary on your Raspberry Pi. execPath, err := exec.LookPath("chromium-browser") if err != nil { log.Fatalf("Chromium binary not found: %v", err) } opts := append(chromedp.DefaultExecAllocatorOptions[:], chromedp.Flag("headless", true), chromedp.Flag("disable-gpu", true), chromedp.Flag("no-sandbox", true), chromedp.Flag("ignore-certificate-errors", true), chromedp.ExecPath(execPath), ) ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) defer cancel() ctx, cancel = chromedp.NewContext(ctx) defer cancel() // Navigate to the URL and wait for the page to load. var responseText string err = chromedp.Run(ctx, chromedp.Navigate(baseURI+"cgi-seiten/amtsblatt.htm"), // Replace with the URL you want to crawl. chromedp.WaitVisible("body", chromedp.ByQuery), chromedp.Evaluate(`document.documentElement.innerHTML`, &responseText), ) if err != nil { log.Fatal(err) } // Parse the HTML content using goquery. doc, err := goquery.NewDocumentFromReader(strings.NewReader(responseText)) if err != nil { log.Fatal(err) } // Find and extract the data you need from the parsed HTML using goquery selectors. doc.Find("a").Each(func(i int, s *goquery.Selection) { link, exists := s.Attr("href") if exists { nameRAW := strings.Split(link, "/") name := nameRAW[len(nameRAW)-1] link = baseURI + strings.Replace(link, "../", "", -1) downloadFile(link, "mitteilungsblatt_"+name) } }) } func downloadFile(url string, filepath string) error { if fileExists(filepath) { fmt.Printf("File %s already exists. Skipping download.\n", filepath) return nil } response, err := http.Get(url) if err != nil { return err } defer response.Body.Close() if response.StatusCode != http.StatusOK { return fmt.Errorf("failed to download file, status code: %d", response.StatusCode) } // Determine the file extension based on the Content-Type header // contentType := response.Header.Get("Content-Type") // ext := ".txt" // Default extension if Content-Type is not present or not recognized // if strings.Contains(contentType, "pdf") { // ext = ".pdf" // } else if strings.Contains(contentType, "jpeg") { // ext = ".jpeg" // } // Add more cases for other file types as needed // Create the file with the appropriate extension file, err := os.Create(filepath) if err != nil { return err } defer file.Close() _, err = io.Copy(file, response.Body) if err != nil { return err } fmt.Println("File downloaded successfully!") return nil } func fileExists(filepath string) bool { _, err := os.Stat(filepath) return !os.IsNotExist(err) }