using System; using System.Text.RegularExpressions; using HtmlAgilityPack; public class Address { public string Street { get; set; } public string City { get; set; } public string State { get; set; } public string ZipCode { get; set; } } public class Posting { public string Agency { get; set; } public string Title { get; set; } public string OccupationalCategory { get; set; } public string SalaryGrade { get; set; } public string BargainingUnit { get; set; } public string SalaryRange { get; set; } public string EmploymentType { get; set; } public string AppointmentType { get; set; } public string JurisdictionalClass { get; set; } public string TravelPercentage { get; set; } public string MinimumQualifications { get; set; } public string DutiesDescription { get; set; } public string ContactName { get; set; } public string ContactEmailAddress { get; set; } public Address LocationAddress { get; set; } public Address ContactAddress { get; set; } public string NotesOnApplying { get; set; } public string BoxNumber { get; set; } public string VacancyID { get; set; } public DateTime DatePosted { get; set; } public DateTime DateDue { get; set; } public Posting(string id) { string fullUrl = "https://statejobs.ny.gov/employees/vacancyDetailsPrint.cfm?id=" + id; Console.WriteLine(fullUrl); HtmlWeb web = new HtmlWeb(); var htmlDoc = web.Load(fullUrl); // extracting all links int i = 0; this.LocationAddress = new Address(); this.ContactAddress = new Address(); var node = htmlDoc.DocumentNode.SelectSingleNode("//*[@id=\"noNavContent\"]/p"); Regex r = new Regex(@"\d\d/\d\d/\d\d"); MatchCollection matches = r.Matches(node.InnerHtml); this.DatePosted = DateTime.Parse(matches[0].Value); this.DateDue = DateTime.Parse(matches[1].Value); Console.WriteLine($"Due: {this.DateDue}, posted: {this.DatePosted}"); foreach (HtmlNode link in htmlDoc.DocumentNode.SelectNodes("//span[@class=\"rightCol\"]")) { switch (i) { case 0: this.Agency = link.InnerHtml.Trim(); break; case 1: this.Title = link.InnerHtml.Trim(); break; case 2: this.OccupationalCategory = link.InnerHtml.Trim(); break; case 3: this.SalaryGrade = link.InnerHtml.Trim(); break; case 4: this.BargainingUnit = link.InnerHtml.Trim(); break; case 5: this.SalaryRange = link.InnerHtml.Trim(); break; case 6: this.EmploymentType = link.InnerHtml.Trim(); break; case 7: this.AppointmentType = link.InnerHtml.Trim(); break; case 8: this.JurisdictionalClass = link.InnerHtml.Trim(); break; case 9: this.TravelPercentage = link.InnerHtml.Trim(); break; case 19: this.LocationAddress.Street = link.InnerHtml.Trim(); break; case 21: this.LocationAddress.City = link.InnerHtml.Trim(); break; case 22: this.LocationAddress.State = link.InnerHtml.Trim(); break; case 23: this.LocationAddress.ZipCode = link.InnerHtml.Trim(); break; case 24: this.MinimumQualifications = link.InnerHtml.Trim(); break; case 25: this.DutiesDescription = link.InnerHtml.Trim(); break; case 27: this.ContactName = link.InnerHtml.Trim(); break; case 30: this.ContactEmailAddress = link.InnerHtml.Trim(); break; case 31: this.ContactAddress.Street = link.InnerHtml.Trim(); break; case 33: this.ContactAddress.City = link.InnerHtml.Trim(); break; case 34: this.ContactAddress.State = link.InnerHtml.Trim(); break; case 35: this.ContactAddress.ZipCode = link.InnerHtml.Trim(); break; case 36: this.NotesOnApplying = link.InnerHtml.Trim(); break; default: break; } i++; } this.BargainingUnit = this.BargainingUnit.Replace("&", "&"); this.VacancyID = id; } }